NotificationsYou must be signed in to change notification settings
Fork321
Star786

Commit0f3a341

committed

feat: add default LoadJobConfig to Client

1 parentaa0fa02 commit0f3a341Copy full SHA for 0f3a341

File tree

5 files changed

+621

-56

lines changed

google/cloud/bigquery
- client.py
- job
  - base.py
tests
- system
  - test_client.py
- unit
  - job
    - test_base.py
  - test_client.py

5 files changed

+621

-56

lines changed

`‎google/cloud/bigquery/client.py‎`

Lines changed: 71 additions & 50 deletions

Original file line number	Diff line number	Diff line change
`@@ -210,6 +210,9 @@ class Client(ClientWithProject):`
`210`	`210`	`default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):`
`211`	`211`	Default ``QueryJobConfig``.
`212`	`212`	Will be merged into job configs passed into the ``query`` method.
	`213`	`+ default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):`
	`214`	+ Default ``LoadJobConfig``.
	`215`	+ Will be merged into job configs passed into the ``load_table_*`` methods.
`213`	`216`	`client_info (Optional[google.api_core.client_info.ClientInfo]):`
`214`	`217`	`The client info used to send a user-agent string along with API`
`215`	`218`	requests. If ``None``, then default info will be used. Generally,
`@@ -235,6 +238,7 @@ def __init__(`
`235`	`238`	`_http=None,`
`236`	`239`	`location=None,`
`237`	`240`	`default_query_job_config=None,`
	`241`	`+default_load_job_config=None,`
`238`	`242`	`client_info=None,`
`239`	`243`	`client_options=None,`
`240`	`244`	`)->None:`
`@@ -260,6 +264,7 @@ def __init__(`
`260`	`264`	`self._connection=Connection(self,**kw_args)`
`261`	`265`	`self._location=location`
`262`	`266`	`self._default_query_job_config=copy.deepcopy(default_query_job_config)`
	`267`	`+self._default_load_job_config=copy.deepcopy(default_load_job_config)`
`263`	`268`
`264`	`269`	`@property`
`265`	`270`	`deflocation(self):`
`@@ -277,6 +282,17 @@ def default_query_job_config(self):`
`277`	`282`	`defdefault_query_job_config(self,value:QueryJobConfig):`
`278`	`283`	`self._default_query_job_config=copy.deepcopy(value)`
`279`	`284`
	`285`	`+@property`
	`286`	`+defdefault_load_job_config(self):`
	`287`	+"""Default ``LoadJobConfig``.
	`288`	+ Will be merged into job configs passed into the ``load_table_*`` methods.
	`289`	`+ """`
	`290`	`+returnself._default_load_job_config`
	`291`	`+`
	`292`	`+@default_load_job_config.setter`
	`293`	`+defdefault_load_job_config(self,value:LoadJobConfig):`
	`294`	`+self._default_load_job_config=copy.deepcopy(value)`
	`295`	`+`
`280`	`296`	`defclose(self):`
`281`	`297`	`"""Close the underlying transport objects, releasing system resources.`
`282`	`298`
`@@ -2330,8 +2346,8 @@ def load_table_from_uri(`
`2330`	`2346`
`2331`	`2347`	`Raises:`
`2332`	`2348`	`TypeError:`
`2333`		- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
`2334`		`- class.`
	`2349`	+ If ``job_config`` is not an instance of
	`2350`	+:class:`~google.cloud.bigquery.job.LoadJobConfig`class.
`2335`	`2351`	`"""`
`2336`	`2352`	`job_id=_make_job_id(job_id,job_id_prefix)`
`2337`	`2353`
`@@ -2348,11 +2364,14 @@ def load_table_from_uri(`
`2348`	`2364`
`2349`	`2365`	`destination=_table_arg_to_table_ref(destination,default_project=self.project)`
`2350`	`2366`
`2351`		`-ifjob_config:`
`2352`		`-job_config=copy.deepcopy(job_config)`
`2353`		`-_verify_job_config_type(job_config,google.cloud.bigquery.job.LoadJobConfig)`
	`2367`	`+ifjob_configisnotNone:`
	`2368`	`+_verify_job_config_type(job_config,LoadJobConfig)`
	`2369`	`+else:`
	`2370`	`+job_config=job.LoadJobConfig()`
`2354`	`2371`
`2355`		`-load_job=job.LoadJob(job_ref,source_uris,destination,self,job_config)`
	`2372`	`+new_job_config=job_config._fill_from_default(self._default_load_job_config)`
	`2373`	`+`
	`2374`	`+load_job=job.LoadJob(job_ref,source_uris,destination,self,new_job_config)`
`2356`	`2375`	`load_job._begin(retry=retry,timeout=timeout)`
`2357`	`2376`
`2358`	`2377`	`returnload_job`
`@@ -2424,8 +2443,8 @@ def load_table_from_file(`
`2424`	`2443`	`mode.`
`2425`	`2444`
`2426`	`2445`	`TypeError:`
`2427`		- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
`2428`		`- class.`
	`2446`	+ If ``job_config`` is not an instance of
	`2447`	+:class:`~google.cloud.bigquery.job.LoadJobConfig`class.
`2429`	`2448`	`"""`
`2430`	`2449`	`job_id=_make_job_id(job_id,job_id_prefix)`
`2431`	`2450`
`@@ -2437,10 +2456,15 @@ def load_table_from_file(`
`2437`	`2456`
`2438`	`2457`	`destination=_table_arg_to_table_ref(destination,default_project=self.project)`
`2439`	`2458`	`job_ref=job._JobReference(job_id,project=project,location=location)`
`2440`		`-ifjob_config:`
`2441`		`-job_config=copy.deepcopy(job_config)`
`2442`		`-_verify_job_config_type(job_config,google.cloud.bigquery.job.LoadJobConfig)`
`2443`		`-load_job=job.LoadJob(job_ref,None,destination,self,job_config)`
	`2459`	`+`
	`2460`	`+ifjob_configisnotNone:`
	`2461`	`+_verify_job_config_type(job_config,LoadJobConfig)`
	`2462`	`+else:`
	`2463`	`+job_config=job.LoadJobConfig()`
	`2464`	`+`
	`2465`	`+new_job_config=job_config._fill_from_default(self._default_load_job_config)`
	`2466`	`+`
	`2467`	`+load_job=job.LoadJob(job_ref,None,destination,self,new_job_config)`
`2444`	`2468`	`job_resource=load_job.to_api_repr()`
`2445`	`2469`
`2446`	`2470`	`ifrewind:`
`@@ -2564,43 +2588,40 @@ def load_table_from_dataframe(`
`2564`	`2588`	`If a usable parquet engine cannot be found. This method`
`2565`	`2589`	requires :mod:`pyarrow` to be installed.
`2566`	`2590`	`TypeError:`
`2567`		- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
`2568`		`- class.`
	`2591`	+ If ``job_config`` is not an instance of
	`2592`	+:class:`~google.cloud.bigquery.job.LoadJobConfig`class.
`2569`	`2593`	`"""`
`2570`	`2594`	`job_id=_make_job_id(job_id,job_id_prefix)`
`2571`	`2595`
`2572`		`-ifjob_config:`
`2573`		`-_verify_job_config_type(job_config,google.cloud.bigquery.job.LoadJobConfig)`
`2574`		`-# Make a copy so that the job config isn't modified in-place.`
`2575`		`-job_config_properties=copy.deepcopy(job_config._properties)`
`2576`		`-job_config=job.LoadJobConfig()`
`2577`		`-job_config._properties=job_config_properties`
`2578`		`-`
	`2596`	`+ifjob_configisnotNone:`
	`2597`	`+_verify_job_config_type(job_config,LoadJobConfig)`
`2579`	`2598`	`else:`
`2580`	`2599`	`job_config=job.LoadJobConfig()`
`2581`	`2600`
	`2601`	`+new_job_config=job_config._fill_from_default(self._default_load_job_config)`
	`2602`	`+`
`2582`	`2603`	`supported_formats= {job.SourceFormat.CSV,job.SourceFormat.PARQUET}`
`2583`		`-ifjob_config.source_formatisNone:`
	`2604`	`+ifnew_job_config.source_formatisNone:`
`2584`	`2605`	`# default value`
`2585`		`-job_config.source_format=job.SourceFormat.PARQUET`
	`2606`	`+new_job_config.source_format=job.SourceFormat.PARQUET`
`2586`	`2607`
`2587`	`2608`	`if (`
`2588`		`-job_config.source_format==job.SourceFormat.PARQUET`
`2589`		`-andjob_config.parquet_optionsisNone`
	`2609`	`+new_job_config.source_format==job.SourceFormat.PARQUET`
	`2610`	`+andnew_job_config.parquet_optionsisNone`
`2590`	`2611`	`):`
`2591`	`2612`	`parquet_options=ParquetOptions()`
`2592`	`2613`	`# default value`
`2593`	`2614`	`parquet_options.enable_list_inference=True`
`2594`		`-job_config.parquet_options=parquet_options`
	`2615`	`+new_job_config.parquet_options=parquet_options`
`2595`	`2616`
`2596`		`-ifjob_config.source_formatnotinsupported_formats:`
	`2617`	`+ifnew_job_config.source_formatnotinsupported_formats:`
`2597`	`2618`	`raiseValueError(`
`2598`	`2619`	`"Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format(`
`2599`		`-job_config.source_format`
	`2620`	`+new_job_config.source_format`
`2600`	`2621`	`)`
`2601`	`2622`	`)`
`2602`	`2623`
`2603`		`-ifpyarrowisNoneandjob_config.source_format==job.SourceFormat.PARQUET:`
	`2624`	`+ifpyarrowisNoneandnew_job_config.source_format==job.SourceFormat.PARQUET:`
`2604`	`2625`	`# pyarrow is now the only supported parquet engine.`
`2605`	`2626`	`raiseValueError("This method requires pyarrow to be installed")`
`2606`	`2627`
`@@ -2611,8 +2632,8 @@ def load_table_from_dataframe(`
`2611`	`2632`	`# schema, and check if dataframe schema is compatible with it - except`
`2612`	`2633`	`# for WRITE_TRUNCATE jobs, the existing schema does not matter then.`
`2613`	`2634`	`if (`
`2614`		`-notjob_config.schema`
`2615`		`-andjob_config.write_disposition!=job.WriteDisposition.WRITE_TRUNCATE`
	`2635`	`+notnew_job_config.schema`
	`2636`	`+andnew_job_config.write_disposition!=job.WriteDisposition.WRITE_TRUNCATE`
`2616`	`2637`	`):`
`2617`	`2638`	`try:`
`2618`	`2639`	`table=self.get_table(destination)`
`@@ -2623,7 +2644,7 @@ def load_table_from_dataframe(`
`2623`	`2644`	`name`
`2624`	`2645`	`forname,_in_pandas_helpers.list_columns_and_indexes(dataframe)`
`2625`	`2646`	`)`
`2626`		`-job_config.schema= [`
	`2647`	`+new_job_config.schema= [`
`2627`	`2648`	`# Field description and policy tags are not needed to`
`2628`	`2649`	`# serialize a data frame.`
`2629`	`2650`	`SchemaField(`
`@@ -2637,11 +2658,11 @@ def load_table_from_dataframe(`
`2637`	`2658`	`iffield.nameincolumns_and_indexes`
`2638`	`2659`	`]`
`2639`	`2660`
`2640`		`-job_config.schema=_pandas_helpers.dataframe_to_bq_schema(`
`2641`		`-dataframe,job_config.schema`
	`2661`	`+new_job_config.schema=_pandas_helpers.dataframe_to_bq_schema(`
	`2662`	`+dataframe,new_job_config.schema`
`2642`	`2663`	`)`
`2643`	`2664`
`2644`		`-ifnotjob_config.schema:`
	`2665`	`+ifnotnew_job_config.schema:`
`2645`	`2666`	`# the schema could not be fully detected`
`2646`	`2667`	`warnings.warn(`
`2647`	`2668`	`"Schema could not be detected for all columns. Loading from a "`
`@@ -2652,13 +2673,13 @@ def load_table_from_dataframe(`
`2652`	`2673`	`)`
`2653`	`2674`
`2654`	`2675`	`tmpfd,tmppath=tempfile.mkstemp(`
`2655`		`-suffix="_job_{}.{}".format(job_id[:8],job_config.source_format.lower())`
	`2676`	`+suffix="_job_{}.{}".format(job_id[:8],new_job_config.source_format.lower())`
`2656`	`2677`	`)`
`2657`	`2678`	`os.close(tmpfd)`
`2658`	`2679`
`2659`	`2680`	`try:`
`2660`	`2681`
`2661`		`-ifjob_config.source_format==job.SourceFormat.PARQUET:`
	`2682`	`+ifnew_job_config.source_format==job.SourceFormat.PARQUET:`
`2662`	`2683`	`if_PYARROW_VERSIONin_PYARROW_BAD_VERSIONS:`
`2663`	`2684`	`msg= (`
`2664`	`2685`	`"Loading dataframe data in PARQUET format with pyarrow "`
`@@ -2669,13 +2690,13 @@ def load_table_from_dataframe(`
`2669`	`2690`	`)`
`2670`	`2691`	`warnings.warn(msg,category=RuntimeWarning)`
`2671`	`2692`
`2672`		`-ifjob_config.schema:`
	`2693`	`+ifnew_job_config.schema:`
`2673`	`2694`	`ifparquet_compression=="snappy":# adjust the default value`
`2674`	`2695`	`parquet_compression=parquet_compression.upper()`
`2675`	`2696`
`2676`	`2697`	`_pandas_helpers.dataframe_to_parquet(`
`2677`	`2698`	`dataframe,`
`2678`		`-job_config.schema,`
	`2699`	`+new_job_config.schema,`
`2679`	`2700`	`tmppath,`
`2680`	`2701`	`parquet_compression=parquet_compression,`
`2681`	`2702`	`parquet_use_compliant_nested_type=True,`
`@@ -2715,7 +2736,7 @@ def load_table_from_dataframe(`
`2715`	`2736`	`job_id_prefix=job_id_prefix,`
`2716`	`2737`	`location=location,`
`2717`	`2738`	`project=project,`
`2718`		`-job_config=job_config,`
	`2739`	`+job_config=new_job_config,`
`2719`	`2740`	`timeout=timeout,`
`2720`	`2741`	`)`
`2721`	`2742`
`@@ -2791,22 +2812,22 @@ def load_table_from_json(`
`2791`	`2812`
`2792`	`2813`	`Raises:`
`2793`	`2814`	`TypeError:`
`2794`		- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
`2795`		`- class.`
	`2815`	+ If ``job_config`` is not an instance of
	`2816`	+:class:`~google.cloud.bigquery.job.LoadJobConfig`class.
`2796`	`2817`	`"""`
`2797`	`2818`	`job_id=_make_job_id(job_id,job_id_prefix)`
`2798`	`2819`
`2799`		`-ifjob_config:`
`2800`		`-_verify_job_config_type(job_config,google.cloud.bigquery.job.LoadJobConfig)`
`2801`		`-# Make a copy so that the job config isn't modified in-place.`
`2802`		`-job_config=copy.deepcopy(job_config)`
	`2820`	`+ifjob_configisnotNone:`
	`2821`	`+_verify_job_config_type(job_config,LoadJobConfig)`
`2803`	`2822`	`else:`
`2804`	`2823`	`job_config=job.LoadJobConfig()`
`2805`	`2824`
`2806`		`-job_config.source_format=job.SourceFormat.NEWLINE_DELIMITED_JSON`
	`2825`	`+new_job_config=job_config._fill_from_default(self._default_load_job_config)`
	`2826`	`+`
	`2827`	`+new_job_config.source_format=job.SourceFormat.NEWLINE_DELIMITED_JSON`
`2807`	`2828`
`2808`		`-ifjob_config.schemaisNone:`
`2809`		`-job_config.autodetect=True`
	`2829`	`+ifnew_job_config.schemaisNone:`
	`2830`	`+new_job_config.autodetect=True`
`2810`	`2831`
`2811`	`2832`	`ifprojectisNone:`
`2812`	`2833`	`project=self.project`
`@@ -2828,7 +2849,7 @@ def load_table_from_json(`
`2828`	`2849`	`job_id_prefix=job_id_prefix,`
`2829`	`2850`	`location=location,`
`2830`	`2851`	`project=project,`
`2831`		`-job_config=job_config,`
	`2852`	`+job_config=new_job_config,`
`2832`	`2853`	`timeout=timeout,`
`2833`	`2854`	`)`
`2834`	`2855`

`‎google/cloud/bigquery/job/base.py‎`

Lines changed: 5 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -269,7 +269,7 @@ def to_api_repr(self) -> dict:`
`269`	`269`	`"""`
`270`	`270`	`returncopy.deepcopy(self._properties)`
`271`	`271`
`272`		`-def_fill_from_default(self,default_job_config):`
	`272`	`+def_fill_from_default(self,default_job_config=None):`
`273`	`273`	`"""Merge this job config with a default job config.`
`274`	`274`
`275`	`275`	`The keys in this object take precedence over the keys in the default`
`@@ -283,6 +283,10 @@ def _fill_from_default(self, default_job_config):`
`283`	`283`	`Returns:`
`284`	`284`	`google.cloud.bigquery.job._JobConfig: A new (merged) job config.`
`285`	`285`	`"""`
	`286`	`+ifnotdefault_job_config:`
	`287`	`+new_job_config=copy.deepcopy(self)`
	`288`	`+returnnew_job_config`
	`289`	`+`
`286`	`290`	`ifself._job_type!=default_job_config._job_type:`
`287`	`291`	`raiseTypeError(`
`288`	`292`	`"attempted to merge two incompatible job types: "`

`‎tests/system/test_client.py‎`

Lines changed: 4 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -2319,7 +2319,7 @@ def _table_exists(t):`
`2319`	`2319`	`returnFalse`
`2320`	`2320`
`2321`	`2321`
`2322`		`-deftest_dbapi_create_view(dataset_id):`
	`2322`	`+deftest_dbapi_create_view(dataset_id:str):`
`2323`	`2323`
`2324`	`2324`	`query=f"""`
`2325`	`2325`	`CREATE VIEW{dataset_id}.dbapi_create_view`
`@@ -2332,7 +2332,7 @@ def test_dbapi_create_view(dataset_id):`
`2332`	`2332`	`assertConfig.CURSOR.rowcount==0,"expected 0 rows"`
`2333`	`2333`
`2334`	`2334`
`2335`		`-deftest_parameterized_types_round_trip(dataset_id):`
	`2335`	`+deftest_parameterized_types_round_trip(dataset_id:str):`
`2336`	`2336`	`client=Config.CLIENT`
`2337`	`2337`	`table_id=f"{dataset_id}.test_parameterized_types_round_trip"`
`2338`	`2338`	`fields= (`
`@@ -2358,7 +2358,7 @@ def test_parameterized_types_round_trip(dataset_id):`
`2358`	`2358`	`asserttuple(s._key()[:2]forsintable2.schema)==fields`
`2359`	`2359`
`2360`	`2360`
`2361`		`-deftest_table_snapshots(dataset_id):`
	`2361`	`+deftest_table_snapshots(dataset_id:str):`
`2362`	`2362`	`fromgoogle.cloud.bigqueryimportCopyJobConfig`
`2363`	`2363`	`fromgoogle.cloud.bigqueryimportOperationType`
`2364`	`2364`
`@@ -2429,7 +2429,7 @@ def test_table_snapshots(dataset_id):`
`2429`	`2429`	`assertrows== [(1,"one"), (2,"two")]`
`2430`	`2430`
`2431`	`2431`
`2432`		`-deftest_table_clones(dataset_id):`
	`2432`	`+deftest_table_clones(dataset_id:str):`
`2433`	`2433`	`fromgoogle.cloud.bigqueryimportCopyJobConfig`
`2434`	`2434`	`fromgoogle.cloud.bigqueryimportOperationType`
`2435`	`2435`

`‎tests/unit/job/test_base.py‎`

Lines changed: 28 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1104,7 +1104,7 @@ def test_ctor_with_unknown_property_raises_error(self):`
`1104`	`1104`	`config=self._make_one()`
`1105`	`1105`	`config.wrong_name=None`
`1106`	`1106`
`1107`		`-deftest_fill_from_default(self):`
	`1107`	`+deftest_fill_query_job_config_from_default(self):`
`1108`	`1108`	`fromgoogle.cloud.bigqueryimportQueryJobConfig`
`1109`	`1109`
`1110`	`1110`	`job_config=QueryJobConfig()`
`@@ -1120,6 +1120,22 @@ def test_fill_from_default(self):`
`1120`	`1120`	`self.assertTrue(final_job_config.use_query_cache)`
`1121`	`1121`	`self.assertEqual(final_job_config.maximum_bytes_billed,1000)`
`1122`	`1122`
	`1123`	`+deftest_fill_load_job_from_default(self):`
	`1124`	`+fromgoogle.cloud.bigqueryimportLoadJobConfig`
	`1125`	`+`
	`1126`	`+job_config=LoadJobConfig()`
	`1127`	`+job_config.create_session=True`
	`1128`	`+job_config.encoding="UTF-8"`
	`1129`	`+`
	`1130`	`+default_job_config=LoadJobConfig()`
	`1131`	`+default_job_config.ignore_unknown_values=True`
	`1132`	`+default_job_config.encoding="ISO-8859-1"`
	`1133`	`+`
	`1134`	`+final_job_config=job_config._fill_from_default(default_job_config)`
	`1135`	`+self.assertTrue(final_job_config.create_session)`
	`1136`	`+self.assertTrue(final_job_config.ignore_unknown_values)`
	`1137`	`+self.assertEqual(final_job_config.encoding,"UTF-8")`
	`1138`	`+`
`1123`	`1139`	`deftest_fill_from_default_conflict(self):`
`1124`	`1140`	`fromgoogle.cloud.bigqueryimportQueryJobConfig`
`1125`	`1141`
`@@ -1132,6 +1148,17 @@ def test_fill_from_default_conflict(self):`
`1132`	`1148`	`withself.assertRaises(TypeError):`
`1133`	`1149`	`basic_job_config._fill_from_default(conflicting_job_config)`
`1134`	`1150`
	`1151`	`+deftest_fill_from_empty_default_conflict(self):`
	`1152`	`+fromgoogle.cloud.bigqueryimportQueryJobConfig`
	`1153`	`+`
	`1154`	`+job_config=QueryJobConfig()`
	`1155`	`+job_config.dry_run=True`
	`1156`	`+job_config.maximum_bytes_billed=1000`
	`1157`	`+`
	`1158`	`+final_job_config=job_config._fill_from_default(default_job_config=None)`
	`1159`	`+self.assertTrue(final_job_config.dry_run)`
	`1160`	`+self.assertEqual(final_job_config.maximum_bytes_billed,1000)`
	`1161`	`+`
`1135`	`1162`	`@mock.patch("google.cloud.bigquery._helpers._get_sub_prop")`
`1136`	`1163`	`deftest__get_sub_prop_wo_default(self,_get_sub_prop):`
`1137`	`1164`	`job_config=self._make_one()`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit0f3a341

File tree

5 files changed

5 files changed

`‎google/cloud/bigquery/client.py‎`

`‎google/cloud/bigquery/job/base.py‎`

`‎tests/system/test_client.py‎`

`‎tests/unit/job/test_base.py‎`

0 commit comments