Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7603bd7

Browse files
tswastgcf-owl-bot[bot]chalmerlowe
authored
deps: use pandas-gbq to determine schema inload_table_from_dataframe (#2095)
* feat: use pandas-gbq to determine schema in `load_table_from_dataframe`* 🦉 Updates from OwlBot post-processorSeehttps://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md* fix some unit tests* 🦉 Updates from OwlBot post-processorSeehttps://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md* 🦉 Updates from OwlBot post-processorSeehttps://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md* bump minimum pandas-gbq to 0.26.1* 🦉 Updates from OwlBot post-processorSeehttps://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md* drop pandas-gbq from python 3.7 extras* relax warning message text assertion* use consistent time zone presense/absense in time datetime system test* Update google/cloud/bigquery/_pandas_helpers.py* Update google/cloud/bigquery/_pandas_helpers.pyCo-authored-by: Chalmer Lowe <chalmerlowe@google.com>* remove pandas-gbq from at least 1 unit test and system test session---------Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>Co-authored-by: Chalmer Lowe <chalmerlowe@google.com>
1 parentb03a2af commit7603bd7

File tree

8 files changed

+147
-22
lines changed

8 files changed

+147
-22
lines changed

‎google/cloud/bigquery/_pandas_helpers.py‎

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""Shared helper functions for connecting BigQuery and pandas."""
15+
"""Shared helper functions for connecting BigQuery and pandas.
16+
17+
NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
18+
instead. See: go/pandas-gbq-and-bigframes-redundancy and
19+
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py
20+
"""
1621

1722
importconcurrent.futures
1823
fromdatetimeimportdatetime
@@ -40,6 +45,16 @@
4045
else:
4146
importnumpy
4247

48+
49+
try:
50+
importpandas_gbq.schema.pandas_to_bigquery# type: ignore
51+
52+
pandas_gbq_import_exception=None
53+
exceptImportErrorasexc:
54+
pandas_gbq=None
55+
pandas_gbq_import_exception=exc
56+
57+
4358
try:
4459
importdb_dtypes# type: ignore
4560

@@ -445,6 +460,10 @@ def _first_array_valid(series):
445460
defdataframe_to_bq_schema(dataframe,bq_schema):
446461
"""Convert a pandas DataFrame schema to a BigQuery schema.
447462
463+
DEPRECATED: Use
464+
pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(),
465+
instead. See: go/pandas-gbq-and-bigframes-redundancy.
466+
448467
Args:
449468
dataframe (pandas.DataFrame):
450469
DataFrame for which the client determines the BigQuery schema.
@@ -460,6 +479,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
460479
The automatically determined schema. Returns None if the type of
461480
any column cannot be determined.
462481
"""
482+
ifpandas_gbqisNone:
483+
warnings.warn(
484+
"Loading pandas DataFrame into BigQuery will require pandas-gbq "
485+
"package version 0.26.1 or greater in the future. "
486+
f"Tried to import pandas-gbq and got:{pandas_gbq_import_exception}",
487+
category=FutureWarning,
488+
)
489+
else:
490+
returnpandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(
491+
dataframe,
492+
override_bigquery_fields=bq_schema,
493+
index=True,
494+
)
495+
463496
ifbq_schema:
464497
bq_schema=schema._to_schema_fields(bq_schema)
465498
bq_schema_index= {field.name:fieldforfieldinbq_schema}

‎google/cloud/bigquery/_pyarrow_helpers.py‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""Shared helper functions for connecting BigQuery and pyarrow."""
15+
"""Shared helper functions for connecting BigQuery and pyarrow.
16+
17+
NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
18+
instead. See: go/pandas-gbq-and-bigframes-redundancy and
19+
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py
20+
"""
1621

1722
fromtypingimportAny
1823

‎noxfile.py‎

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ def default(session, install_extras=True):
110110
else:
111111
install_target="."
112112
session.install("-e",install_target,"-c",constraints_path)
113+
114+
# Test with some broken "extras" in case the user didn't install the extra
115+
# directly. For example, pandas-gbq is recommended for pandas features, but
116+
# we want to test that we fallback to the previous behavior. For context,
117+
# see internal document go/pandas-gbq-and-bigframes-redundancy.
118+
ifsession.python==UNIT_TEST_PYTHON_VERSIONS[0]:
119+
session.run("python","-m","pip","uninstall","pandas-gbq","-y")
120+
113121
session.run("python","-m","pip","freeze")
114122

115123
# Run py.test against the unit tests.
@@ -228,6 +236,13 @@ def system(session):
228236
extras="[all]"
229237
session.install("-e",f".{extras}","-c",constraints_path)
230238

239+
# Test with some broken "extras" in case the user didn't install the extra
240+
# directly. For example, pandas-gbq is recommended for pandas features, but
241+
# we want to test that we fallback to the previous behavior. For context,
242+
# see internal document go/pandas-gbq-and-bigframes-redundancy.
243+
ifsession.python==SYSTEM_TEST_PYTHON_VERSIONS[0]:
244+
session.run("python","-m","pip","uninstall","pandas-gbq","-y")
245+
231246
# print versions of all dependencies
232247
session.run("python","-m","pip","freeze")
233248

‎pyproject.toml‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ bqstorage = [
7474
]
7575
pandas = [
7676
"pandas >= 1.1.0",
77+
"pandas-gbq >= 0.26.1; python_version >= '3.8'",
78+
"grpcio >= 1.47.0, < 2.0dev",
79+
"grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'",
7780
"pyarrow >= 3.0.0",
7881
"db-dtypes >= 0.3.0, < 2.0.0dev",
7982
"importlib_metadata >= 1.0.0; python_version < '3.8'",

‎testing/constraints-3.8.txt‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,11 @@
11
grpcio==1.47.0
22
pandas==1.2.0
3+
4+
# This constraints file is used to check that lower bounds
5+
# are correct in setup.py
6+
#
7+
# Pin the version to the lower bound.
8+
#
9+
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
10+
# Then this file should have foo==1.14.0
11+
pandas-gbq==0.26.1

‎tests/system/test_pandas.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id):
12591259
df=pandas.DataFrame(
12601260
dict(
12611261
dt=[
1262-
datetime.datetime(2020,1,8,8,0,0),
1262+
datetime.datetime(2020,1,8,8,0,0,tzinfo=datetime.timezone.utc),
12631263
datetime.datetime(
12641264
2020,
12651265
1,

‎tests/unit/test__pandas_helpers.py‎

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@
3535
exceptImportError:
3636
pandas=None
3737

38+
try:
39+
importpandas_gbq.schema.pandas_to_bigquery
40+
exceptImportError:
41+
pandas_gbq=None
42+
3843
try:
3944
importgeopandas
4045
exceptImportError:
@@ -1281,7 +1286,21 @@ def test_dataframe_to_parquet_compression_method(module_under_test):
12811286

12821287

12831288
@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
1284-
deftest_dataframe_to_bq_schema_w_named_index(module_under_test):
1289+
@pytest.mark.skipif(pandas_gbqisNone,reason="Requires `pandas-gbq`")
1290+
deftest_dataframe_to_bq_schema_returns_schema_with_pandas_gbq(
1291+
module_under_test,monkeypatch
1292+
):
1293+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1294+
dataframe=pandas.DataFrame({"field00": ["foo","bar"]})
1295+
got=module_under_test.dataframe_to_bq_schema(dataframe, [])
1296+
# Don't assert beyond this, since pandas-gbq is now source of truth.
1297+
assertgotisnotNone
1298+
1299+
1300+
@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
1301+
deftest_dataframe_to_bq_schema_w_named_index(module_under_test,monkeypatch):
1302+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1303+
12851304
df_data=collections.OrderedDict(
12861305
[
12871306
("str_column", ["hello","world"]),
@@ -1292,7 +1311,8 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test):
12921311
index=pandas.Index(["a","b"],name="str_index")
12931312
dataframe=pandas.DataFrame(df_data,index=index)
12941313

1295-
returned_schema=module_under_test.dataframe_to_bq_schema(dataframe, [])
1314+
withpytest.warns(FutureWarning,match="pandas-gbq"):
1315+
returned_schema=module_under_test.dataframe_to_bq_schema(dataframe, [])
12961316

12971317
expected_schema= (
12981318
schema.SchemaField("str_index","STRING","NULLABLE"),
@@ -1304,7 +1324,9 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test):
13041324

13051325

13061326
@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
1307-
deftest_dataframe_to_bq_schema_w_multiindex(module_under_test):
1327+
deftest_dataframe_to_bq_schema_w_multiindex(module_under_test,monkeypatch):
1328+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1329+
13081330
df_data=collections.OrderedDict(
13091331
[
13101332
("str_column", ["hello","world"]),
@@ -1321,7 +1343,8 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test):
13211343
)
13221344
dataframe=pandas.DataFrame(df_data,index=index)
13231345

1324-
returned_schema=module_under_test.dataframe_to_bq_schema(dataframe, [])
1346+
withpytest.warns(FutureWarning,match="pandas-gbq"):
1347+
returned_schema=module_under_test.dataframe_to_bq_schema(dataframe, [])
13251348

13261349
expected_schema= (
13271350
schema.SchemaField("str_index","STRING","NULLABLE"),
@@ -1335,7 +1358,9 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test):
13351358

13361359

13371360
@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
1338-
deftest_dataframe_to_bq_schema_w_bq_schema(module_under_test):
1361+
deftest_dataframe_to_bq_schema_w_bq_schema(module_under_test,monkeypatch):
1362+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1363+
13391364
df_data=collections.OrderedDict(
13401365
[
13411366
("str_column", ["hello","world"]),
@@ -1350,7 +1375,10 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test):
13501375
{"name":"bool_column","type":"BOOL","mode":"REQUIRED"},
13511376
]
13521377

1353-
returned_schema=module_under_test.dataframe_to_bq_schema(dataframe,dict_schema)
1378+
withpytest.warns(FutureWarning,match="pandas-gbq"):
1379+
returned_schema=module_under_test.dataframe_to_bq_schema(
1380+
dataframe,dict_schema
1381+
)
13541382

13551383
expected_schema= (
13561384
schema.SchemaField("str_column","STRING","NULLABLE"),
@@ -1361,7 +1389,11 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test):
13611389

13621390

13631391
@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
1364-
deftest_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):
1392+
deftest_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(
1393+
module_under_test,monkeypatch
1394+
):
1395+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1396+
13651397
dataframe=pandas.DataFrame(
13661398
data=[
13671399
{"id":10,"status":"FOO","execution_date":datetime.date(2019,5,10)},
@@ -1389,7 +1421,11 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):
13891421

13901422
@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
13911423
@pytest.mark.skipif(isinstance(pyarrow,mock.Mock),reason="Requires `pyarrow`")
1392-
deftest_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
1424+
deftest_dataframe_to_bq_schema_fallback_needed_w_pyarrow(
1425+
module_under_test,monkeypatch
1426+
):
1427+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1428+
13931429
dataframe=pandas.DataFrame(
13941430
data=[
13951431
{"id":10,"status":"FOO","created_at":datetime.date(2019,5,10)},
@@ -1419,7 +1455,9 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
14191455

14201456
@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
14211457
@pytest.mark.skipif(isinstance(pyarrow,mock.Mock),reason="Requires `pyarrow`")
1422-
deftest_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
1458+
deftest_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test,monkeypatch):
1459+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1460+
14231461
dataframe=pandas.DataFrame(
14241462
data=[
14251463
{"struct_field": {"one":2},"status":"FOO"},
@@ -1443,9 +1481,11 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
14431481

14441482

14451483
@pytest.mark.skipif(geopandasisNone,reason="Requires `geopandas`")
1446-
deftest_dataframe_to_bq_schema_geography(module_under_test):
1484+
deftest_dataframe_to_bq_schema_geography(module_under_test,monkeypatch):
14471485
fromshapelyimportwkt
14481486

1487+
monkeypatch.setattr(module_under_test,"pandas_gbq",None)
1488+
14491489
df=geopandas.GeoDataFrame(
14501490
pandas.DataFrame(
14511491
dict(
@@ -1456,7 +1496,10 @@ def test_dataframe_to_bq_schema_geography(module_under_test):
14561496
),
14571497
geometry="geo1",
14581498
)
1459-
bq_schema=module_under_test.dataframe_to_bq_schema(df, [])
1499+
1500+
withpytest.warns(FutureWarning,match="pandas-gbq"):
1501+
bq_schema=module_under_test.dataframe_to_bq_schema(df, [])
1502+
14601503
assertbq_schema== (
14611504
schema.SchemaField("name","STRING"),
14621505
schema.SchemaField("geo1","GEOGRAPHY"),

‎tests/unit/test_client.py‎

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8391,8 +8391,12 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self):
83918391
autospec=True,
83928392
side_effect=google.api_core.exceptions.NotFound("Table not found"),
83938393
)
8394+
pandas_gbq_patch=mock.patch(
8395+
"google.cloud.bigquery._pandas_helpers.pandas_gbq",
8396+
new=None,
8397+
)
83948398

8395-
withload_patchasload_table_from_file,get_table_patch:
8399+
withload_patchasload_table_from_file,get_table_patch,pandas_gbq_patch:
83968400
withwarnings.catch_warnings(record=True)aswarned:
83978401
client.load_table_from_dataframe(
83988402
dataframe,self.TABLE_REF,location=self.LOCATION
@@ -8448,7 +8452,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
84488452
load_patch=mock.patch(
84498453
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
84508454
)
8451-
84528455
get_table_patch=mock.patch(
84538456
"google.cloud.bigquery.client.Client.get_table",
84548457
autospec=True,
@@ -8460,6 +8463,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
84608463
]
84618464
),
84628465
)
8466+
84638467
withload_patchasload_table_from_file,get_table_patch:
84648468
client.load_table_from_dataframe(
84658469
dataframe,self.TABLE_REF,location=self.LOCATION
@@ -8580,10 +8584,10 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
85808584

85818585
client=self._make_client()
85828586
dataframe=pandas.DataFrame({"x": [1,2,None,4]},dtype="Int64")
8587+
85838588
load_patch=mock.patch(
85848589
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
85858590
)
8586-
85878591
get_table_patch=mock.patch(
85888592
"google.cloud.bigquery.client.Client.get_table",
85898593
autospec=True,
@@ -8612,8 +8616,11 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
86128616

86138617
sent_config=load_table_from_file.mock_calls[0][2]["job_config"]
86148618
assertsent_config.source_format==job.SourceFormat.PARQUET
8615-
asserttuple(sent_config.schema)== (
8616-
SchemaField("x","INT64","NULLABLE",None),
8619+
assert (
8620+
# Accept either the GoogleSQL or legacy SQL type name from pandas-gbq.
8621+
tuple(sent_config.schema)== (SchemaField("x","INT64","NULLABLE",None),)
8622+
ortuple(sent_config.schema)
8623+
== (SchemaField("x","INTEGER","NULLABLE",None),)
86178624
)
86188625

86198626
deftest_load_table_from_dataframe_struct_fields(self):
@@ -8759,14 +8766,22 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
87598766
data=records,columns=["float_column","array_column"]
87608767
)
87618768

8762-
expected_schema= [
8769+
expected_schema_googlesql= [
87638770
SchemaField("float_column","FLOAT"),
87648771
SchemaField(
87658772
"array_column",
87668773
"INT64",
87678774
mode="REPEATED",
87688775
),
87698776
]
8777+
expected_schema_legacy_sql= [
8778+
SchemaField("float_column","FLOAT"),
8779+
SchemaField(
8780+
"array_column",
8781+
"INTEGER",
8782+
mode="REPEATED",
8783+
),
8784+
]
87708785

87718786
load_patch=mock.patch(
87728787
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
@@ -8802,7 +8817,10 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
88028817

88038818
sent_config=load_table_from_file.mock_calls[0][2]["job_config"]
88048819
assertsent_config.source_format==job.SourceFormat.PARQUET
8805-
assertsent_config.schema==expected_schema
8820+
assert (
8821+
sent_config.schema==expected_schema_googlesql
8822+
orsent_config.schema==expected_schema_legacy_sql
8823+
)
88068824

88078825
deftest_load_table_from_dataframe_w_partial_schema(self):
88088826
pandas=pytest.importorskip("pandas")
@@ -8922,7 +8940,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self):
89228940

89238941
load_table_from_file.assert_not_called()
89248942
message=str(exc_context.value)
8925-
assert"bq_schema contains fields not present in dataframe"inmessage
89268943
assert"unknown_col"inmessage
89278944

89288945
deftest_load_table_from_dataframe_w_schema_arrow_custom_compression(self):

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp