Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1c1791e

Browse files
committed
Add json(b)_to_tsvector function
Jsonb has a complex nature so there isn't best-for-everything way to convert itto tsvector for full text search. Current to_tsvector(json(b)) suggests toconvert only string values, but it's possible to index keys, numerics and evenbooleans value. To solve that json(b)_to_tsvector has a second requiredargument contained a list of desired types of json fields. Second argument isa jsonb scalar or array right now with possibility to add new options in afuture.Bump catalog versionAuthor: Dmitry Dolgov with some editorization by meReviewed by: Teodor SigaevDiscussion:https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
1 parent529ab7b commit1c1791e

File tree

10 files changed

+629
-49
lines changed

10 files changed

+629
-49
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9727,6 +9727,26 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
97279727
<entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
97289728
<entry><literal>'fat':2 'rat':3</literal></entry>
97299729
</row>
9730+
<row>
9731+
<entry>
9732+
<literal><function>json(b)_to_tsvector(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type>,
9733+
</optional> <replaceable class="parameter">document</replaceable> <type>json(b)</type>,
9734+
<replaceable class="parameter">filter</replaceable> <type>json(b)</type>)</function></literal>
9735+
</entry>
9736+
<entry><type>tsvector</type></entry>
9737+
<entry>
9738+
reduce each value in the document, specified by <replaceable class="parameter">filter</replaceable> to a <type>tsvector</type>,
9739+
and then concatenate those in document order to produce a single <type>tsvector</type>.
9740+
<replaceable class="parameter">filter</replaceable> is a jsonb array, that enumerates what kind of elements need to be included
9741+
into the resulting <type>tsvector</type>. Possible values for <replaceable class="parameter">filter</replaceable> are
9742+
<literal>"string"</literal> (to include all string values), <literal>"numeric"</literal> (to include all numeric values in the string format),
9743+
<literal>"boolean"</literal> (to include all boolean values in the string format "true"/"false"),
9744+
<literal>"key"</literal> (to include all keys) or <literal>"all"</literal> (to include all above). These values
9745+
can be combined together to include, e.g. all string and numeric values.
9746+
</entry>
9747+
<entry><literal>json_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json, '["string", "numeric"]')</literal></entry>
9748+
<entry><literal>'123':5 'fat':2 'rat':3</literal></entry>
9749+
</row>
97309750
<row>
97319751
<entry>
97329752
<indexterm>

‎src/backend/tsearch/to_tsany.c

Lines changed: 108 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
267267
PointerGetDatum(in)));
268268
}
269269

270-
Datum
271-
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
270+
/*
271+
* Worker function for jsonb(_string)_to_tsvector(_byid)
272+
*/
273+
staticTSVector
274+
jsonb_to_tsvector_worker(OidcfgId,Jsonb*jb,uint32flags)
272275
{
273-
OidcfgId=PG_GETARG_OID(0);
274-
Jsonb*jb=PG_GETARG_JSONB_P(1);
275-
TSVectorresult;
276276
TSVectorBuildStatestate;
277277
ParsedTextprs;
278278

@@ -281,33 +281,77 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
281281
state.prs=&prs;
282282
state.cfgId=cfgId;
283283

284-
iterate_jsonb_string_values(jb,&state,add_to_tsvector);
284+
iterate_jsonb_values(jb,flags,&state,add_to_tsvector);
285285

286-
PG_FREE_IF_COPY(jb,1);
286+
returnmake_tsvector(&prs);
287+
}
288+
289+
Datum
290+
jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
291+
{
292+
OidcfgId=PG_GETARG_OID(0);
293+
Jsonb*jb=PG_GETARG_JSONB_P(1);
294+
TSVectorresult;
287295

288-
result=make_tsvector(&prs);
296+
result=jsonb_to_tsvector_worker(cfgId,jb,jtiString);
297+
PG_FREE_IF_COPY(jb,1);
289298

290299
PG_RETURN_TSVECTOR(result);
291300
}
292301

293302
Datum
294-
jsonb_to_tsvector(PG_FUNCTION_ARGS)
303+
jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
295304
{
296305
Jsonb*jb=PG_GETARG_JSONB_P(0);
297306
OidcfgId;
307+
TSVectorresult;
298308

299309
cfgId=getTSCurrentConfig(true);
300-
PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
301-
ObjectIdGetDatum(cfgId),
302-
JsonbPGetDatum(jb)));
310+
result=jsonb_to_tsvector_worker(cfgId,jb,jtiString);
311+
PG_FREE_IF_COPY(jb,0);
312+
313+
PG_RETURN_TSVECTOR(result);
303314
}
304315

305316
Datum
306-
json_to_tsvector_byid(PG_FUNCTION_ARGS)
317+
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
307318
{
308319
OidcfgId=PG_GETARG_OID(0);
309-
text*json=PG_GETARG_TEXT_P(1);
320+
Jsonb*jb=PG_GETARG_JSONB_P(1);
321+
Jsonb*jbFlags=PG_GETARG_JSONB_P(2);
322+
TSVectorresult;
323+
uint32flags=parse_jsonb_index_flags(jbFlags);
324+
325+
result=jsonb_to_tsvector_worker(cfgId,jb,flags);
326+
PG_FREE_IF_COPY(jb,1);
327+
PG_FREE_IF_COPY(jbFlags,2);
328+
329+
PG_RETURN_TSVECTOR(result);
330+
}
331+
332+
Datum
333+
jsonb_to_tsvector(PG_FUNCTION_ARGS)
334+
{
335+
Jsonb*jb=PG_GETARG_JSONB_P(0);
336+
Jsonb*jbFlags=PG_GETARG_JSONB_P(1);
337+
OidcfgId;
310338
TSVectorresult;
339+
uint32flags=parse_jsonb_index_flags(jbFlags);
340+
341+
cfgId=getTSCurrentConfig(true);
342+
result=jsonb_to_tsvector_worker(cfgId,jb,flags);
343+
PG_FREE_IF_COPY(jb,0);
344+
PG_FREE_IF_COPY(jbFlags,1);
345+
346+
PG_RETURN_TSVECTOR(result);
347+
}
348+
349+
/*
350+
* Worker function for json(_string)_to_tsvector(_byid)
351+
*/
352+
staticTSVector
353+
json_to_tsvector_worker(OidcfgId,text*json,uint32flags)
354+
{
311355
TSVectorBuildStatestate;
312356
ParsedTextprs;
313357

@@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
316360
state.prs=&prs;
317361
state.cfgId=cfgId;
318362

319-
iterate_json_string_values(json,&state,add_to_tsvector);
363+
iterate_json_values(json,flags,&state,add_to_tsvector);
364+
365+
returnmake_tsvector(&prs);
366+
}
367+
368+
Datum
369+
json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
370+
{
371+
OidcfgId=PG_GETARG_OID(0);
372+
text*json=PG_GETARG_TEXT_P(1);
373+
TSVectorresult;
320374

375+
result=json_to_tsvector_worker(cfgId,json,jtiString);
321376
PG_FREE_IF_COPY(json,1);
322377

323-
result=make_tsvector(&prs);
378+
PG_RETURN_TSVECTOR(result);
379+
}
380+
381+
Datum
382+
json_string_to_tsvector(PG_FUNCTION_ARGS)
383+
{
384+
text*json=PG_GETARG_TEXT_P(0);
385+
OidcfgId;
386+
TSVectorresult;
387+
388+
cfgId=getTSCurrentConfig(true);
389+
result=json_to_tsvector_worker(cfgId,json,jtiString);
390+
PG_FREE_IF_COPY(json,0);
391+
392+
PG_RETURN_TSVECTOR(result);
393+
}
394+
395+
Datum
396+
json_to_tsvector_byid(PG_FUNCTION_ARGS)
397+
{
398+
OidcfgId=PG_GETARG_OID(0);
399+
text*json=PG_GETARG_TEXT_P(1);
400+
Jsonb*jbFlags=PG_GETARG_JSONB_P(2);
401+
TSVectorresult;
402+
uint32flags=parse_jsonb_index_flags(jbFlags);
403+
404+
result=json_to_tsvector_worker(cfgId,json,flags);
405+
PG_FREE_IF_COPY(json,1);
406+
PG_FREE_IF_COPY(jbFlags,2);
324407

325408
PG_RETURN_TSVECTOR(result);
326409
}
@@ -329,12 +412,17 @@ Datum
329412
json_to_tsvector(PG_FUNCTION_ARGS)
330413
{
331414
text*json=PG_GETARG_TEXT_P(0);
415+
Jsonb*jbFlags=PG_GETARG_JSONB_P(1);
332416
OidcfgId;
417+
TSVectorresult;
418+
uint32flags=parse_jsonb_index_flags(jbFlags);
333419

334420
cfgId=getTSCurrentConfig(true);
335-
PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
336-
ObjectIdGetDatum(cfgId),
337-
PointerGetDatum(json)));
421+
result=json_to_tsvector_worker(cfgId,json,flags);
422+
PG_FREE_IF_COPY(json,0);
423+
PG_FREE_IF_COPY(jbFlags,1);
424+
425+
PG_RETURN_TSVECTOR(result);
338426
}
339427

340428
/*
@@ -353,7 +441,7 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
353441
* First time through: initialize words array to a reasonable size.
354442
* (parsetext() will realloc it bigger as needed.)
355443
*/
356-
prs->lenwords=Max(elem_len /6,64);
444+
prs->lenwords=16;
357445
prs->words= (ParsedWord*)palloc(sizeof(ParsedWord)*prs->lenwords);
358446
prs->curwords=0;
359447
prs->pos=0;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp