Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit864383b

Browse files
author
Jim Fulton
authored
fix: Arrow extension-type metadata was not set when calling the REST API or when there are no rows (#946)
1 parent1a6ab12 commit864383b

File tree

5 files changed

+118
-3
lines changed

5 files changed

+118
-3
lines changed

‎google/cloud/bigquery/_pandas_helpers.py‎

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,13 @@ def pyarrow_timestamp():
173173
pyarrow.decimal128(38,scale=9).id:"NUMERIC",
174174
pyarrow.decimal256(76,scale=38).id:"BIGNUMERIC",
175175
}
176+
BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA= {
177+
"GEOGRAPHY": {
178+
b"ARROW:extension:name":b"google:sqlType:geography",
179+
b"ARROW:extension:metadata":b'{"encoding": "WKT"}',
180+
},
181+
"DATETIME": {b"ARROW:extension:name":b"google:sqlType:datetime"},
182+
}
176183

177184
else:# pragma: NO COVER
178185
BQ_TO_ARROW_SCALARS= {}# pragma: NO COVER
@@ -227,7 +234,12 @@ def bq_to_arrow_field(bq_field, array_type=None):
227234
ifarray_typeisnotNone:
228235
arrow_type=array_type# For GEOGRAPHY, at least initially
229236
is_nullable=bq_field.mode.upper()=="NULLABLE"
230-
returnpyarrow.field(bq_field.name,arrow_type,nullable=is_nullable)
237+
metadata=BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get(
238+
bq_field.field_type.upper()ifbq_field.field_typeelse""
239+
)
240+
returnpyarrow.field(
241+
bq_field.name,arrow_type,nullable=is_nullable,metadata=metadata
242+
)
231243

232244
warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name))
233245
returnNone

‎google/cloud/bigquery/table.py‎

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1810,10 +1810,14 @@ def to_arrow(
18101810
ifowns_bqstorage_client:
18111811
bqstorage_client._transport.grpc_channel.close()
18121812

1813-
ifrecord_batches:
1813+
ifrecord_batchesandbqstorage_clientisnotNone:
18141814
returnpyarrow.Table.from_batches(record_batches)
18151815
else:
1816-
# No records, use schema based on BigQuery schema.
1816+
# No records (not record_batches), use schema based on BigQuery schema
1817+
# **or**
1818+
# we used the REST API (bqstorage_client is None),
1819+
# which doesn't add arrow extension metadata, so we let
1820+
# `bq_to_arrow_schema` do it.
18171821
arrow_schema=_pandas_helpers.bq_to_arrow_schema(self._schema)
18181822
returnpyarrow.Table.from_batches(record_batches,schema=arrow_schema)
18191823

‎tests/system/conftest.py‎

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
importpathlib
16+
importre
1617

1718
importpytest
1819
importtest_utils.prefixer
@@ -61,6 +62,17 @@ def dataset_id(bigquery_client):
6162
bigquery_client.delete_dataset(dataset_id,delete_contents=True,not_found_ok=True)
6263

6364

65+
@pytest.fixture()
66+
defdataset_client(bigquery_client,dataset_id):
67+
importgoogle.cloud.bigquery.job
68+
69+
returnbigquery.Client(
70+
default_query_job_config=google.cloud.bigquery.job.QueryJobConfig(
71+
default_dataset=f"{bigquery_client.project}.{dataset_id}",
72+
)
73+
)
74+
75+
6476
@pytest.fixture
6577
deftable_id(dataset_id):
6678
returnf"{dataset_id}.table_{helpers.temp_suffix()}"
@@ -98,3 +110,8 @@ def scalars_extreme_table(
98110
job.result()
99111
yieldfull_table_id
100112
bigquery_client.delete_table(full_table_id)
113+
114+
115+
@pytest.fixture
116+
deftest_table_name(request,replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub):
117+
returnreplace_non_anum("_",request.node.name)

‎tests/system/test_arrow.py‎

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,62 @@ def test_list_rows_nullable_scalars_dtypes(
110110
timestamp_type=schema.field("timestamp_col").type
111111
asserttimestamp_type.unit=="us"
112112
asserttimestamp_type.tzisnotNone
113+
114+
115+
@pytest.mark.parametrize("do_insert", [True,False])
116+
deftest_arrow_extension_types_same_for_storage_and_REST_APIs_894(
117+
dataset_client,test_table_name,do_insert
118+
):
119+
types=dict(
120+
astring=("STRING","'x'"),
121+
astring9=("STRING(9)","'x'"),
122+
abytes=("BYTES","b'x'"),
123+
abytes9=("BYTES(9)","b'x'"),
124+
anumeric=("NUMERIC","42"),
125+
anumeric9=("NUMERIC(9)","42"),
126+
anumeric92=("NUMERIC(9,2)","42"),
127+
abignumeric=("BIGNUMERIC","42e30"),
128+
abignumeric49=("BIGNUMERIC(37)","42e30"),
129+
abignumeric492=("BIGNUMERIC(37,2)","42e30"),
130+
abool=("BOOL","true"),
131+
adate=("DATE","'2021-09-06'"),
132+
adatetime=("DATETIME","'2021-09-06T09:57:26'"),
133+
ageography=("GEOGRAPHY","ST_GEOGFROMTEXT('point(0 0)')"),
134+
# Can't get arrow data for interval :(
135+
# ainterval=('INTERVAL', "make_interval(1, 2, 3, 4, 5, 6)"),
136+
aint64=("INT64","42"),
137+
afloat64=("FLOAT64","42.0"),
138+
astruct=("STRUCT<v int64>","struct(42)"),
139+
atime=("TIME","'1:2:3'"),
140+
atimestamp=("TIMESTAMP","'2021-09-06T09:57:26'"),
141+
)
142+
columns=", ".join(f"{k}{t[0]}"fork,tintypes.items())
143+
dataset_client.query(f"create table{test_table_name} ({columns})").result()
144+
ifdo_insert:
145+
names=list(types)
146+
values=", ".join(types[name][1]fornameinnames)
147+
names=", ".join(names)
148+
dataset_client.query(
149+
f"insert into{test_table_name} ({names}) values ({values})"
150+
).result()
151+
at=dataset_client.query(f"select * from{test_table_name}").result().to_arrow()
152+
storage_api_metadata= {
153+
at.field(i).name:at.field(i).metadataforiinrange(at.num_columns)
154+
}
155+
at= (
156+
dataset_client.query(f"select * from{test_table_name}")
157+
.result()
158+
.to_arrow(create_bqstorage_client=False)
159+
)
160+
rest_api_metadata= {
161+
at.field(i).name:at.field(i).metadataforiinrange(at.num_columns)
162+
}
163+
164+
assertrest_api_metadata==storage_api_metadata
165+
assertrest_api_metadata["adatetime"]== {
166+
b"ARROW:extension:name":b"google:sqlType:datetime"
167+
}
168+
assertrest_api_metadata["ageography"]== {
169+
b"ARROW:extension:name":b"google:sqlType:geography",
170+
b"ARROW:extension:metadata":b'{"encoding": "WKT"}',
171+
}

‎tests/unit/test__pandas_helpers.py‎

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,3 +1696,26 @@ def test_bq_to_arrow_field_type_override(module_under_test):
16961696
).type
16971697
==pyarrow.binary()
16981698
)
1699+
1700+
1701+
@pytest.mark.skipif(isinstance(pyarrow,mock.Mock),reason="Requires `pyarrow`")
1702+
@pytest.mark.parametrize(
1703+
"field_type, metadata",
1704+
[
1705+
("datetime", {b"ARROW:extension:name":b"google:sqlType:datetime"}),
1706+
(
1707+
"geography",
1708+
{
1709+
b"ARROW:extension:name":b"google:sqlType:geography",
1710+
b"ARROW:extension:metadata":b'{"encoding": "WKT"}',
1711+
},
1712+
),
1713+
],
1714+
)
1715+
deftest_bq_to_arrow_field_metadata(module_under_test,field_type,metadata):
1716+
assert (
1717+
module_under_test.bq_to_arrow_field(
1718+
schema.SchemaField("g",field_type)
1719+
).metadata
1720+
==metadata
1721+
)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp