Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6249032

Browse files
authored
fix: change load_table_from_json autodetect logic (#1804)
1 parent1298594 commit6249032

File tree

3 files changed

+255
-5
lines changed

3 files changed

+255
-5
lines changed

‎google/cloud/bigquery/client.py‎

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2833,8 +2833,22 @@ def load_table_from_json(
28332833

28342834
new_job_config.source_format=job.SourceFormat.NEWLINE_DELIMITED_JSON
28352835

2836-
ifnew_job_config.schemaisNone:
2837-
new_job_config.autodetect=True
2836+
# In specific conditions, we check if the table alread exists, and/or
2837+
# set the autodetect value for the user. For exact conditions, see table
2838+
# https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
2839+
ifnew_job_config.schemaisNoneandnew_job_config.autodetectisNone:
2840+
ifnew_job_config.write_dispositionin (
2841+
job.WriteDisposition.WRITE_TRUNCATE,
2842+
job.WriteDisposition.WRITE_EMPTY,
2843+
):
2844+
new_job_config.autodetect=True
2845+
else:
2846+
try:
2847+
self.get_table(destination)
2848+
exceptcore_exceptions.NotFound:
2849+
new_job_config.autodetect=True
2850+
else:
2851+
new_job_config.autodetect=False
28382852

28392853
ifprojectisNone:
28402854
project=self.project

‎tests/system/test_client.py‎

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,45 @@ def test_load_table_from_json_schema_autodetect(self):
994994
self.assertEqual(tuple(table.schema),table_schema)
995995
self.assertEqual(table.num_rows,2)
996996

997+
# Autodetect makes best effort to infer the schema, but situations exist
998+
# when the detected schema is wrong, and does not match existing schema.
999+
# Thus the client sets autodetect = False when table exists and just uses
1000+
# the existing schema. This test case uses a special case where backend has
1001+
# no way to distinguish int from string.
1002+
deftest_load_table_from_json_schema_autodetect_table_exists(self):
1003+
json_rows= [
1004+
{"name":"123","age":18,"birthday":"2001-10-15","is_awesome":False},
1005+
{"name":"456","age":79,"birthday":"1940-03-10","is_awesome":True},
1006+
]
1007+
1008+
dataset_id=_make_dataset_id("bq_system_test")
1009+
self.temp_dataset(dataset_id)
1010+
table_id="{}.{}.load_table_from_json_basic_use".format(
1011+
Config.CLIENT.project,dataset_id
1012+
)
1013+
1014+
# Use schema with NULLABLE fields, because schema autodetection
1015+
# defaults to field mode NULLABLE.
1016+
table_schema= (
1017+
bigquery.SchemaField("name","STRING",mode="NULLABLE"),
1018+
bigquery.SchemaField("age","INTEGER",mode="NULLABLE"),
1019+
bigquery.SchemaField("birthday","DATE",mode="NULLABLE"),
1020+
bigquery.SchemaField("is_awesome","BOOLEAN",mode="NULLABLE"),
1021+
)
1022+
# create the table before loading so that the column order is predictable
1023+
table=helpers.retry_403(Config.CLIENT.create_table)(
1024+
Table(table_id,schema=table_schema)
1025+
)
1026+
self.to_delete.insert(0,table)
1027+
1028+
# do not pass an explicit job config to trigger automatic schema detection
1029+
load_job=Config.CLIENT.load_table_from_json(json_rows,table_id)
1030+
load_job.result()
1031+
1032+
table=Config.CLIENT.get_table(table)
1033+
self.assertEqual(tuple(table.schema),table_schema)
1034+
self.assertEqual(table.num_rows,2)
1035+
9971036
deftest_load_avro_from_uri_then_dump_table(self):
9981037
fromgoogle.cloud.bigquery.jobimportCreateDisposition
9991038
fromgoogle.cloud.bigquery.jobimportSourceFormat

‎tests/unit/test_client.py‎

Lines changed: 200 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8951,6 +8951,8 @@ def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self):
89518951
SchemaField("x","BIGNUMERIC","NULLABLE",None),
89528952
)
89538953

8954+
# With autodetect specified, we pass the value as is. For more info, see
8955+
# https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
89548956
deftest_load_table_from_json_basic_use(self):
89558957
fromgoogle.cloud.bigquery.clientimport_DEFAULT_NUM_RETRIES
89568958
fromgoogle.cloud.bigqueryimportjob
@@ -8962,12 +8964,28 @@ def test_load_table_from_json_basic_use(self):
89628964
{"name":"Two","age":22,"birthday":"1997-08-09","adult":True},
89638965
]
89648966

8967+
job_config=job.LoadJobConfig(autodetect=True)
8968+
89658969
load_patch=mock.patch(
89668970
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
89678971
)
89688972

8969-
withload_patchasload_table_from_file:
8970-
client.load_table_from_json(json_rows,self.TABLE_REF)
8973+
# mock: remote table already exists
8974+
get_table_reference= {
8975+
"projectId":"project_id",
8976+
"datasetId":"test_dataset",
8977+
"tableId":"test_table",
8978+
}
8979+
get_table_patch=mock.patch(
8980+
"google.cloud.bigquery.client.Client.get_table",
8981+
autospec=True,
8982+
return_value=mock.Mock(table_reference=get_table_reference),
8983+
)
8984+
8985+
withload_patchasload_table_from_file,get_table_patch:
8986+
client.load_table_from_json(
8987+
json_rows,self.TABLE_REF,job_config=job_config
8988+
)
89718989

89728990
load_table_from_file.assert_called_once_with(
89738991
client,
@@ -9066,6 +9084,174 @@ def test_load_table_from_json_w_invalid_job_config(self):
90669084
err_msg=str(exc.value)
90679085
assert"Expected an instance of LoadJobConfig"inerr_msg
90689086

9087+
# When all following are true:
9088+
# (1) no schema provided;
9089+
# (2) no autodetect value provided;
9090+
# (3) writeDisposition == WRITE_APPEND or None;
9091+
# (4) table already exists,
9092+
# client sets autodetect == False
9093+
# For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
9094+
deftest_load_table_from_json_wo_schema_wo_autodetect_write_append_w_table(self):
9095+
fromgoogle.cloud.bigquery.clientimport_DEFAULT_NUM_RETRIES
9096+
fromgoogle.cloud.bigqueryimportjob
9097+
fromgoogle.cloud.bigquery.jobimportWriteDisposition
9098+
9099+
client=self._make_client()
9100+
9101+
json_rows= [
9102+
{"name":"One","age":11,"birthday":"2008-09-10","adult":False},
9103+
{"name":"Two","age":22,"birthday":"1997-08-09","adult":True},
9104+
]
9105+
9106+
job_config=job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND)
9107+
9108+
load_patch=mock.patch(
9109+
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
9110+
)
9111+
9112+
# mock: remote table already exists
9113+
get_table_reference= {
9114+
"projectId":"project_id",
9115+
"datasetId":"test_dataset",
9116+
"tableId":"test_table",
9117+
}
9118+
get_table_patch=mock.patch(
9119+
"google.cloud.bigquery.client.Client.get_table",
9120+
autospec=True,
9121+
return_value=mock.Mock(table_reference=get_table_reference),
9122+
)
9123+
9124+
withload_patchasload_table_from_file,get_table_patch:
9125+
client.load_table_from_json(
9126+
json_rows,self.TABLE_REF,job_config=job_config
9127+
)
9128+
9129+
load_table_from_file.assert_called_once_with(
9130+
client,
9131+
mock.ANY,
9132+
self.TABLE_REF,
9133+
size=mock.ANY,
9134+
num_retries=_DEFAULT_NUM_RETRIES,
9135+
job_id=mock.ANY,
9136+
job_id_prefix=None,
9137+
location=client.location,
9138+
project=client.project,
9139+
job_config=mock.ANY,
9140+
timeout=DEFAULT_TIMEOUT,
9141+
)
9142+
9143+
sent_config=load_table_from_file.mock_calls[0][2]["job_config"]
9144+
assertsent_config.source_format==job.SourceFormat.NEWLINE_DELIMITED_JSON
9145+
assertsent_config.schemaisNone
9146+
assertnotsent_config.autodetect
9147+
9148+
# When all following are true:
9149+
# (1) no schema provided;
9150+
# (2) no autodetect value provided;
9151+
# (3) writeDisposition == WRITE_APPEND or None;
9152+
# (4) table does NOT exist,
9153+
# client sets autodetect == True
9154+
# For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
9155+
deftest_load_table_from_json_wo_schema_wo_autodetect_write_append_wo_table(self):
9156+
importgoogle.api_core.exceptionsascore_exceptions
9157+
fromgoogle.cloud.bigquery.clientimport_DEFAULT_NUM_RETRIES
9158+
fromgoogle.cloud.bigqueryimportjob
9159+
fromgoogle.cloud.bigquery.jobimportWriteDisposition
9160+
9161+
client=self._make_client()
9162+
9163+
json_rows= [
9164+
{"name":"One","age":11,"birthday":"2008-09-10","adult":False},
9165+
{"name":"Two","age":22,"birthday":"1997-08-09","adult":True},
9166+
]
9167+
9168+
job_config=job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND)
9169+
9170+
load_patch=mock.patch(
9171+
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
9172+
)
9173+
9174+
# mock: remote table doesn't exist
9175+
get_table_patch=mock.patch(
9176+
"google.cloud.bigquery.client.Client.get_table",
9177+
autospec=True,
9178+
side_effect=core_exceptions.NotFound(""),
9179+
)
9180+
9181+
withload_patchasload_table_from_file,get_table_patch:
9182+
client.load_table_from_json(
9183+
json_rows,self.TABLE_REF,job_config=job_config
9184+
)
9185+
9186+
load_table_from_file.assert_called_once_with(
9187+
client,
9188+
mock.ANY,
9189+
self.TABLE_REF,
9190+
size=mock.ANY,
9191+
num_retries=_DEFAULT_NUM_RETRIES,
9192+
job_id=mock.ANY,
9193+
job_id_prefix=None,
9194+
location=client.location,
9195+
project=client.project,
9196+
job_config=mock.ANY,
9197+
timeout=DEFAULT_TIMEOUT,
9198+
)
9199+
9200+
sent_config=load_table_from_file.mock_calls[0][2]["job_config"]
9201+
assertsent_config.source_format==job.SourceFormat.NEWLINE_DELIMITED_JSON
9202+
assertsent_config.schemaisNone
9203+
assertsent_config.autodetect
9204+
9205+
# When all following are true:
9206+
# (1) no schema provided;
9207+
# (2) no autodetect value provided;
9208+
# (3) writeDisposition == WRITE_TRUNCATE or WRITE_EMPTY;
9209+
# client sets autodetect == True
9210+
# For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
9211+
deftest_load_table_from_json_wo_schema_wo_autodetect_others(self):
9212+
fromgoogle.cloud.bigquery.clientimport_DEFAULT_NUM_RETRIES
9213+
fromgoogle.cloud.bigqueryimportjob
9214+
fromgoogle.cloud.bigquery.jobimportWriteDisposition
9215+
9216+
client=self._make_client()
9217+
9218+
json_rows= [
9219+
{"name":"One","age":11,"birthday":"2008-09-10","adult":False},
9220+
{"name":"Two","age":22,"birthday":"1997-08-09","adult":True},
9221+
]
9222+
9223+
job_config=job.LoadJobConfig(
9224+
write_disposition=WriteDisposition.WRITE_TRUNCATE
9225+
)
9226+
9227+
load_patch=mock.patch(
9228+
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
9229+
)
9230+
9231+
withload_patchasload_table_from_file:
9232+
client.load_table_from_json(
9233+
json_rows,self.TABLE_REF,job_config=job_config
9234+
)
9235+
9236+
load_table_from_file.assert_called_once_with(
9237+
client,
9238+
mock.ANY,
9239+
self.TABLE_REF,
9240+
size=mock.ANY,
9241+
num_retries=_DEFAULT_NUM_RETRIES,
9242+
job_id=mock.ANY,
9243+
job_id_prefix=None,
9244+
location=client.location,
9245+
project=client.project,
9246+
job_config=mock.ANY,
9247+
timeout=DEFAULT_TIMEOUT,
9248+
)
9249+
9250+
sent_config=load_table_from_file.mock_calls[0][2]["job_config"]
9251+
assertsent_config.source_format==job.SourceFormat.NEWLINE_DELIMITED_JSON
9252+
assertsent_config.schemaisNone
9253+
assertsent_config.autodetect
9254+
90699255
deftest_load_table_from_json_w_explicit_job_config_override(self):
90709256
fromgoogle.cloud.bigqueryimportjob
90719257
fromgoogle.cloud.bigquery.clientimport_DEFAULT_NUM_RETRIES
@@ -9190,8 +9376,19 @@ def test_load_table_from_json_unicode_emoji_data_case(self):
91909376
load_patch=mock.patch(
91919377
"google.cloud.bigquery.client.Client.load_table_from_file",autospec=True
91929378
)
9379+
# mock: remote table already exists
9380+
get_table_reference= {
9381+
"projectId":"project_id",
9382+
"datasetId":"test_dataset",
9383+
"tableId":"test_table",
9384+
}
9385+
get_table_patch=mock.patch(
9386+
"google.cloud.bigquery.client.Client.get_table",
9387+
autospec=True,
9388+
return_value=mock.Mock(table_reference=get_table_reference),
9389+
)
91939390

9194-
withload_patchasload_table_from_file:
9391+
withload_patchasload_table_from_file,get_table_patch:
91959392
client.load_table_from_json(json_rows,self.TABLE_REF)
91969393

91979394
load_table_from_file.assert_called_once_with(

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp