Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb487cf1

Browse files
authored
feat: add bigquery.json_keys (#2286)
1 parent0cb5217 commitb487cf1

File tree

9 files changed

+146
-0
lines changed

9 files changed

+146
-0
lines changed

‎bigframes/bigquery/__init__.py‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
json_extract,
4848
json_extract_array,
4949
json_extract_string_array,
50+
json_keys,
5051
json_query,
5152
json_query_array,
5253
json_set,
@@ -138,6 +139,7 @@
138139
"json_extract",
139140
"json_extract_array",
140141
"json_extract_string_array",
142+
"json_keys",
141143
"json_query",
142144
"json_query_array",
143145
"json_set",

‎bigframes/bigquery/_operations/json.py‎

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,35 @@ def json_value_array(
421421
returninput._apply_unary_op(ops.JSONValueArray(json_path=json_path))
422422

423423

424+
defjson_keys(
425+
input:series.Series,
426+
max_depth:Optional[int]=None,
427+
)->series.Series:
428+
"""Returns all keys in the root of a JSON object as an ARRAY of STRINGs.
429+
430+
**Examples:**
431+
432+
>>> import bigframes.pandas as bpd
433+
>>> import bigframes.bigquery as bbq
434+
435+
>>> s = bpd.Series(['{"b": {"c": 2}, "a": 1}'], dtype="json")
436+
>>> bbq.json_keys(s)
437+
0 ['a' 'b' 'b.c']
438+
dtype: list<item: string>[pyarrow]
439+
440+
Args:
441+
input (bigframes.series.Series):
442+
The Series containing JSON data.
443+
max_depth (int, optional):
444+
Specifies the maximum depth of nested fields to search for keys. If not
445+
provided, searched keys at all levels.
446+
447+
Returns:
448+
bigframes.series.Series: A new Series containing arrays of keys from the input JSON.
449+
"""
450+
returninput._apply_unary_op(ops.JSONKeys(max_depth=max_depth))
451+
452+
424453
defto_json(
425454
input:series.Series,
426455
)->series.Series:

‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,11 @@ def json_value_array_op_impl(x: ibis_types.Value, op: ops.JSONValueArray):
12341234
returnjson_value_array(json_obj=x,json_path=op.json_path)
12351235

12361236

1237+
@scalar_op_compiler.register_unary_op(ops.JSONKeys,pass_op=True)
1238+
defjson_keys_op_impl(x:ibis_types.Value,op:ops.JSONKeys):
1239+
returnjson_keys(x,op.max_depth)
1240+
1241+
12371242
# Blob Ops
12381243
@scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op)
12391244
defobj_fetch_metadata_op_impl(obj_ref:ibis_types.Value):
@@ -2059,6 +2064,14 @@ def to_json_string(value) -> ibis_dtypes.String: # type: ignore[empty-body]
20592064
"""Convert value to JSON-formatted string."""
20602065

20612066

2067+
@ibis_udf.scalar.builtin(name="json_keys")
2068+
defjson_keys(# type: ignore[empty-body]
2069+
json_obj:ibis_dtypes.JSON,
2070+
max_depth:ibis_dtypes.Int64,
2071+
)->ibis_dtypes.Array[ibis_dtypes.String]:
2072+
"""Extracts unique JSON keys from a JSON expression."""
2073+
2074+
20622075
@ibis_udf.scalar.builtin(name="json_value")
20632076
defjson_value(# type: ignore[empty-body]
20642077
json_obj:ibis_dtypes.JSON,json_path:ibis_dtypes.String

‎bigframes/core/compile/sqlglot/expressions/json_ops.py‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ def _(expr: TypedExpr, op: ops.JSONExtractStringArray) -> sge.Expression:
3939
returnsge.func("JSON_EXTRACT_STRING_ARRAY",expr.expr,sge.convert(op.json_path))
4040

4141

42+
@register_unary_op(ops.JSONKeys,pass_op=True)
43+
def_(expr:TypedExpr,op:ops.JSONKeys)->sge.Expression:
44+
returnsge.func("JSON_KEYS",expr.expr,sge.convert(op.max_depth))
45+
46+
4247
@register_unary_op(ops.JSONQuery,pass_op=True)
4348
def_(expr:TypedExpr,op:ops.JSONQuery)->sge.Expression:
4449
returnsge.func("JSON_QUERY",expr.expr,sge.convert(op.json_path))

‎bigframes/operations/__init__.py‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@
128128
JSONExtract,
129129
JSONExtractArray,
130130
JSONExtractStringArray,
131+
JSONKeys,
131132
JSONQuery,
132133
JSONQueryArray,
133134
JSONSet,
@@ -381,6 +382,7 @@
381382
"JSONExtract",
382383
"JSONExtractArray",
383384
"JSONExtractStringArray",
385+
"JSONKeys",
384386
"JSONQuery",
385387
"JSONQueryArray",
386388
"JSONSet",

‎bigframes/operations/json_ops.py‎

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,23 @@ def output_type(self, *input_types):
199199
returninput_type
200200

201201

202+
@dataclasses.dataclass(frozen=True)
203+
classJSONKeys(base_ops.UnaryOp):
204+
name:typing.ClassVar[str]="json_keys"
205+
max_depth:typing.Optional[int]=None
206+
207+
defoutput_type(self,*input_types):
208+
input_type=input_types[0]
209+
ifinput_type!=dtypes.JSON_DTYPE:
210+
raiseTypeError(
211+
"Input type must be a valid JSON object or JSON-formatted string type."
212+
+f" Received type:{input_type}"
213+
)
214+
returnpd.ArrowDtype(
215+
pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(dtypes.STRING_DTYPE))
216+
)
217+
218+
202219
@dataclasses.dataclass(frozen=True)
203220
classJSONDecode(base_ops.UnaryOp):
204221
name:typing.ClassVar[str]="json_decode"

‎tests/system/small/bigquery/test_json.py‎

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,3 +434,53 @@ def test_to_json_string_from_struct():
434434
)
435435

436436
pd.testing.assert_series_equal(actual.to_pandas(),expected.to_pandas())
437+
438+
439+
deftest_json_keys():
440+
json_data= [
441+
'{"name": "Alice", "age": 30}',
442+
'{"city": "New York", "country": "USA", "active": true}',
443+
"{}",
444+
'{"items": [1, 2, 3]}',
445+
]
446+
s=bpd.Series(json_data,dtype=dtypes.JSON_DTYPE)
447+
actual=bbq.json_keys(s)
448+
449+
expected_data_pandas= [
450+
["age","name"],
451+
[
452+
"active",
453+
"city",
454+
"country",
455+
],
456+
[],
457+
["items"],
458+
]
459+
expected=bpd.Series(
460+
expected_data_pandas,dtype=pd.ArrowDtype(pa.list_(pa.string()))
461+
)
462+
pd.testing.assert_series_equal(actual.to_pandas(),expected.to_pandas())
463+
464+
465+
deftest_json_keys_with_max_depth():
466+
json_data= [
467+
'{"user": {"name": "Bob", "details": {"id": 123, "status": "approved"}}}',
468+
'{"user": {"name": "Charlie"}}',
469+
]
470+
s=bpd.Series(json_data,dtype=dtypes.JSON_DTYPE)
471+
actual=bbq.json_keys(s,max_depth=2)
472+
473+
expected_data_pandas= [
474+
["user","user.details","user.name"],
475+
["user","user.name"],
476+
]
477+
expected=bpd.Series(
478+
expected_data_pandas,dtype=pd.ArrowDtype(pa.list_(pa.string()))
479+
)
480+
pd.testing.assert_series_equal(actual.to_pandas(),expected.to_pandas())
481+
482+
483+
deftest_json_keys_from_string_error():
484+
s=bpd.Series(['{"a": 1, "b": 2}','{"c": 3}'])
485+
withpytest.raises(TypeError):
486+
bbq.json_keys(s)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH`bfcte_0`AS (
2+
SELECT
3+
`json_col`
4+
FROM`bigframes-dev`.`sqlglot_test`.`json_types`
5+
),`bfcte_1`AS (
6+
SELECT
7+
*,
8+
JSON_KEYS(`json_col`,NULL)AS`bfcol_1`,
9+
JSON_KEYS(`json_col`,2)AS`bfcol_2`
10+
FROM`bfcte_0`
11+
)
12+
SELECT
13+
`bfcol_1`AS`json_keys`,
14+
`bfcol_2`AS`json_keys_w_max_depth`
15+
FROM`bfcte_1`

‎tests/unit/core/compile/sqlglot/expressions/test_json_ops.py‎

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,19 @@ def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot):
5252
snapshot.assert_match(sql,"out.sql")
5353

5454

55+
deftest_json_keys(json_types_df:bpd.DataFrame,snapshot):
56+
col_name="json_col"
57+
bf_df=json_types_df[[col_name]]
58+
59+
ops_map= {
60+
"json_keys":ops.JSONKeys().as_expr(col_name),
61+
"json_keys_w_max_depth":ops.JSONKeys(max_depth=2).as_expr(col_name),
62+
}
63+
64+
sql=utils._apply_ops_to_sql(bf_df,list(ops_map.values()),list(ops_map.keys()))
65+
snapshot.assert_match(sql,"out.sql")
66+
67+
5568
deftest_json_query(json_types_df:bpd.DataFrame,snapshot):
5669
col_name="json_col"
5770
bf_df=json_types_df[[col_name]]

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp