Commit656d2fa

plamut

and

tswast

authored

fix: error inserting DataFrame with REPEATED field (#925)

Co-authored-by: Tim Swast <swast@google.com>

1 parent8448922 commit656d2faCopy full SHA for 656d2fa

File tree

2 files changed

+56

-15

lines changed

google/cloud/bigquery
- _pandas_helpers.py
tests/unit
- test__pandas_helpers.py

2 files changed

+56

-15

lines changed

`‎google/cloud/bigquery/_pandas_helpers.py‎`

Lines changed: 7 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -844,7 +844,13 @@ def dataframe_to_json_generator(dataframe):`
`844`	`844`	`output= {}`
`845`	`845`	`forcolumn,valueinzip(dataframe.columns,row):`
`846`	`846`	`# Omit NaN values.`
`847`		`-ifpandas.isna(value):`
	`847`	`+is_nan=pandas.isna(value)`
	`848`	`+`
	`849`	`+# isna() can also return an array-like of bools, but the latter's boolean`
	`850`	`+# value is ambiguous, hence an extra check. An array-like value is not`
	`851`	`+# considered a NaN, however.`
	`852`	`+ifisinstance(is_nan,bool)andis_nan:`
`848`	`853`	`continue`
`849`	`854`	`output[column]=value`
	`855`	`+`
`850`	`856`	`yieldoutput`

`‎tests/unit/test__pandas_helpers.py‎`

Lines changed: 49 additions & 14 deletions

Original file line number	Diff line number	Diff line change
`@@ -821,6 +821,41 @@ def test_dataframe_to_json_generator(module_under_test):`
`821`	`821`	`assertlist(rows)==expected`
`822`	`822`
`823`	`823`
	`824`	`+deftest_dataframe_to_json_generator_repeated_field(module_under_test):`
	`825`	`+pytest.importorskip(`
	`826`	`+"pandas",`
	`827`	`+minversion=str(PANDAS_MINIUM_VERSION),`
	`828`	`+reason=(`
	`829`	+f"Requires `pandas version >={PANDAS_MINIUM_VERSION}` "
	`830`	`+"which introduces pandas.NA"`
	`831`	`+ ),`
	`832`	`+ )`
	`833`	`+`
	`834`	`+df_data= [`
	`835`	`+collections.OrderedDict(`
	`836`	`+ [("repeated_col", [pandas.NA,2,None,4]), ("not_repeated_col","first")]`
	`837`	`+ ),`
	`838`	`+collections.OrderedDict(`
	`839`	`+ [`
	`840`	`+ ("repeated_col", ["a","b",mock.sentinel.foo,"d"]),`
	`841`	`+ ("not_repeated_col","second"),`
	`842`	`+ ]`
	`843`	`+ ),`
	`844`	`+ ]`
	`845`	`+dataframe=pandas.DataFrame(df_data)`
	`846`	`+`
	`847`	`+rows=module_under_test.dataframe_to_json_generator(dataframe)`
	`848`	`+`
	`849`	`+expected= [`
	`850`	`+ {"repeated_col": [pandas.NA,2,None,4],"not_repeated_col":"first"},`
	`851`	`+ {`
	`852`	`+"repeated_col": ["a","b",mock.sentinel.foo,"d"],`
	`853`	`+"not_repeated_col":"second",`
	`854`	`+ },`
	`855`	`+ ]`
	`856`	`+assertlist(rows)==expected`
	`857`	`+`
	`858`	`+`
`824`	`859`	@pytest.mark.skipif(pandasisNone,reason="Requires `pandas`")
`825`	`860`	`deftest_list_columns_and_indexes_with_named_index(module_under_test):`
`826`	`861`	`df_data=collections.OrderedDict(`
`@@ -882,7 +917,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):`
`882`	`917`	`deftest_dataframe_to_bq_schema_dict_sequence(module_under_test):`
`883`	`918`	`df_data=collections.OrderedDict(`
`884`	`919`	`[`
`885`		`- ("str_column", [u"hello",u"world"]),`
	`920`	`+ ("str_column", ["hello","world"]),`
`886`	`921`	`("int_column", [42,8]),`
`887`	`922`	`("bool_column", [True,False]),`
`888`	`923`	`]`
`@@ -1070,7 +1105,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):`
`1070`	`1105`	`]`
`1071`	`1106`
`1072`	`1107`	`dataframe=pandas.DataFrame(`
`1073`		`- {"field01": [u"hello",u"world"],"field02": [True,False]}`
	`1108`	`+ {"field01": ["hello","world"],"field02": [True,False]}`
`1074`	`1109`	`)`
`1075`	`1110`
`1076`	`1111`	`arrow_table=module_under_test.dataframe_to_arrow(dataframe,dict_schema)`
`@@ -1139,8 +1174,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test):`
`1139`	`1174`	`deftest_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):`
`1140`	`1175`	`dataframe=pandas.DataFrame(`
`1141`	`1176`	`data=[`
`1142`		`- {"id":10,"status":u"FOO","execution_date":datetime.date(2019,5,10)},`
`1143`		`- {"id":20,"status":u"BAR","created_at":datetime.date(2018,9,12)},`
	`1177`	`+ {"id":10,"status":"FOO","execution_date":datetime.date(2019,5,10)},`
	`1178`	`+ {"id":20,"status":"BAR","created_at":datetime.date(2018,9,12)},`
`1144`	`1179`	`]`
`1145`	`1180`	`)`
`1146`	`1181`
`@@ -1167,8 +1202,8 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):`
`1167`	`1202`	`deftest_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):`
`1168`	`1203`	`dataframe=pandas.DataFrame(`
`1169`	`1204`	`data=[`
`1170`		`- {"id":10,"status":u"FOO","created_at":datetime.date(2019,5,10)},`
`1171`		`- {"id":20,"status":u"BAR","created_at":datetime.date(2018,9,12)},`
	`1205`	`+ {"id":10,"status":"FOO","created_at":datetime.date(2019,5,10)},`
	`1206`	`+ {"id":20,"status":"BAR","created_at":datetime.date(2018,9,12)},`
`1172`	`1207`	`]`
`1173`	`1208`	`)`
`1174`	`1209`
`@@ -1197,8 +1232,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):`
`1197`	`1232`	`deftest_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):`
`1198`	`1233`	`dataframe=pandas.DataFrame(`
`1199`	`1234`	`data=[`
`1200`		`- {"struct_field": {"one":2},"status":u"FOO"},`
`1201`		`- {"struct_field": {"two":u"222"},"status":u"BAR"},`
	`1235`	`+ {"struct_field": {"one":2},"status":"FOO"},`
	`1236`	`+ {"struct_field": {"two":"222"},"status":"BAR"},`
`1202`	`1237`	`]`
`1203`	`1238`	`)`
`1204`	`1239`
`@@ -1252,7 +1287,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test):`
`1252`	`1287`	`"timestamp_field":datetime.datetime(2005,5,31,14,25,55),`
`1253`	`1288`	`"date_field":datetime.date(2005,5,31),`
`1254`	`1289`	`"bytes_field":b"some bytes",`
`1255`		`-"string_field":u"some characters",`
	`1290`	`+"string_field":"some characters",`
`1256`	`1291`	`"numeric_field":decimal.Decimal("123.456"),`
`1257`	`1292`	`"bignumeric_field":decimal.Decimal("{d38}.{d38}".format(d38="9"*38)),`
`1258`	`1293`	`}`
`@@ -1312,13 +1347,13 @@ def test_augment_schema_type_detection_fails(module_under_test):`
`1312`	`1347`	`dataframe=pandas.DataFrame(`
`1313`	`1348`	`data=[`
`1314`	`1349`	`{`
`1315`		`-"status":u"FOO",`
	`1350`	`+"status":"FOO",`
`1316`	`1351`	`"struct_field": {"one":1},`
`1317`		`-"struct_field_2": {"foo":u"123"},`
	`1352`	`+"struct_field_2": {"foo":"123"},`
`1318`	`1353`	`},`
`1319`	`1354`	`{`
`1320`		`-"status":u"BAR",`
`1321`		`-"struct_field": {"two":u"111"},`
	`1355`	`+"status":"BAR",`
	`1356`	`+"struct_field": {"two":"111"},`
`1322`	`1357`	`"struct_field_2": {"bar":27},`
`1323`	`1358`	`},`
`1324`	`1359`	`]`
`@@ -1351,7 +1386,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test):`
`1351`	`1386`	`]`
`1352`	`1387`
`1353`	`1388`	`dataframe=pandas.DataFrame(`
`1354`		`- {"field01": [u"hello",u"world"],"field02": [True,False]}`
	`1389`	`+ {"field01": ["hello","world"],"field02": [True,False]}`
`1355`	`1390`	`)`
`1356`	`1391`
`1357`	`1392`	`write_table_patch=mock.patch.object(`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit656d2fa

File tree

2 files changed

2 files changed

`‎google/cloud/bigquery/_pandas_helpers.py‎`

`‎tests/unit/test__pandas_helpers.py‎`

0 commit comments