Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit348104a

Browse files
author
Arseny Kositsyn
committed
[PGPRO-12159] Added the output of tsv lexemes positions.
If you create an index with the operator class rum_tsvector_ops,the positions of the lexemes will be saved as additional information.The positions are stored in compressed form in bytea.There is a problem that is related to the fact that in the posting tree,additional information for the senior keys is stored in a different way,which is why it has not yet been possible to output it. For all othercases, the output of additional information works correctly.Tags: rum
1 parent18562e2 commit348104a

File tree

3 files changed

+182
-38
lines changed

3 files changed

+182
-38
lines changed

‎src/rum.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include"storage/bufmgr.h"
2222
#include"utils/datum.h"
2323
#include"utils/memutils.h"
24+
#include"tsearch/ts_type.h"
2425

2526
#include"rumsort.h"
2627

@@ -836,6 +837,8 @@ extern RumItem *rumGetBAEntry(BuildAccumulator *accum,
836837
#defineRUM_ADDINFO_JOIN10
837838
#defineRUMNProcs10
838839

840+
#defineLOWERMASK 0x1F
841+
839842
externPGDLLEXPORTDatumrum_extract_tsvector(PG_FUNCTION_ARGS);
840843
externPGDLLEXPORTDatumrum_extract_tsquery(PG_FUNCTION_ARGS);
841844
externPGDLLEXPORTDatumrum_tsvector_config(PG_FUNCTION_ARGS);
@@ -847,6 +850,9 @@ extern PGDLLEXPORT Datum rum_ts_distance_td(PG_FUNCTION_ARGS);
847850

848851
externPGDLLEXPORTDatumtsquery_to_distance_query(PG_FUNCTION_ARGS);
849852

853+
externchar*decompress_pos(char*ptr,WordEntryPos*pos);
854+
externunsignedintcount_pos(char*ptr,intlen);
855+
850856
/* rum_arr_utils.c */
851857
typedefenumSimilarityType
852858
{

‎src/rum_debug_funcs.c

Lines changed: 174 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include"access/relation.h"
2828
#include"utils/varlena.h"
2929
#include"rum.h"
30+
#include"tsearch/ts_type.h"
3031

3132
PG_FUNCTION_INFO_V1(rum_metapage_info);
3233
PG_FUNCTION_INFO_V1(rum_page_opaque_info);
@@ -115,6 +116,8 @@ static Datum category_get_datum_text(RumNullCategory category);
115116
staticOidfind_add_info_oid(RumState*rum_state_ptr);
116117
staticOffsetNumberfind_add_info_atrr_num(RumState*rum_state_ptr);
117118

119+
staticDatumget_positions_to_text_datum(Datumadd_info);
120+
118121
/*
119122
* The rum_metapage_info() function is used to retrieve
120123
* information stored on the meta page of the rum index.
@@ -386,12 +389,6 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
386389
/* Allocating memory for a long-lived structure */
387390
inter_call_data=palloc(sizeof(rum_page_items_state));
388391

389-
/* Initializing the RumState structure */
390-
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
391-
initRumState(inter_call_data->rum_state_ptr,rel);
392-
393-
relation_close(rel,AccessShareLock);
394-
395392
/* Getting a copy of the page from the raw page */
396393
page=get_page_from_raw(raw_page);
397394

@@ -422,6 +419,12 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
422419
errdetail("Flags %04X, expected %04X",
423420
opaq->flags, (RUM_DATA |RUM_LEAF))));
424421

422+
/* Initializing the RumState structure */
423+
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
424+
initRumState(inter_call_data->rum_state_ptr,rel);
425+
426+
relation_close(rel,AccessShareLock);
427+
425428
/* Build a tuple descriptor for our result type */
426429
if (get_call_result_type(fcinfo,NULL,&tupdesc)!=TYPEFUNC_COMPOSITE)
427430
elog(ERROR,"return type must be a row type");
@@ -494,9 +497,24 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
494497
values[2]=BoolGetDatum(high_key_ptr->addInfoIsNull);
495498

496499
/* Returning add info */
497-
if(!high_key_ptr->addInfoIsNull&&inter_call_data->add_info_oid!=0)
500+
if(!(high_key_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0
501+
&&inter_call_data->add_info_oid!=BYTEAOID)
502+
{
498503
values[3]=get_datum_text_by_oid(high_key_ptr->addInfo,
499504
inter_call_data->add_info_oid);
505+
}
506+
507+
/*
508+
* In this case, we are dealing with the positions
509+
* of tokens and they need to be decoded.
510+
*/
511+
elseif (!(high_key_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0
512+
&&inter_call_data->add_info_oid==BYTEAOID)
513+
{
514+
/* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515+
values[3]=CStringGetTextDatum("high key positions in posting tree is not supported");
516+
}
517+
500518
elsenulls[3]= true;
501519

502520
/* Forming the returned tuple */
@@ -536,8 +554,23 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
536554
values[2]=BoolGetDatum(rum_item_ptr->addInfoIsNull);
537555

538556
/* Returning add info */
539-
if(!(rum_item_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0)
540-
values[3]=get_datum_text_by_oid(rum_item_ptr->addInfo,inter_call_data->add_info_oid);
557+
if(!(rum_item_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0
558+
&&inter_call_data->add_info_oid!=BYTEAOID)
559+
{
560+
values[3]=get_datum_text_by_oid(rum_item_ptr->addInfo,
561+
inter_call_data->add_info_oid);
562+
}
563+
564+
/*
565+
* In this case, we are dealing with the positions
566+
* of tokens and they need to be decoded.
567+
*/
568+
elseif (!(rum_item_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0
569+
&&inter_call_data->add_info_oid==BYTEAOID)
570+
{
571+
values[3]=get_positions_to_text_datum(rum_item_ptr->addInfo);
572+
}
573+
541574
elsenulls[3]= true;
542575

543576
/* Forming the returned tuple */
@@ -619,12 +652,6 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
619652
/* Allocating memory for a long-lived structure */
620653
inter_call_data=palloc(sizeof(rum_page_items_state));
621654

622-
/* Initializing the RumState structure */
623-
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
624-
initRumState(inter_call_data->rum_state_ptr,rel);
625-
626-
relation_close(rel,AccessShareLock);
627-
628655
/* Getting a copy of the page from the raw page */
629656
page=get_page_from_raw(raw_page);
630657

@@ -655,6 +682,12 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
655682
errdetail("Flags %04X, expected %04X",
656683
opaq->flags, (RUM_DATA& ~RUM_LEAF))));
657684

685+
/* Initializing the RumState structure */
686+
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
687+
initRumState(inter_call_data->rum_state_ptr,rel);
688+
689+
relation_close(rel,AccessShareLock);
690+
658691
/* Build a tuple descriptor for our result type */
659692
if (get_call_result_type(fcinfo,NULL,&tupdesc)!=TYPEFUNC_COMPOSITE)
660693
elog(ERROR,"return type must be a row type");
@@ -721,9 +754,24 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
721754
values[3]=BoolGetDatum(high_key_ptr->addInfoIsNull);
722755

723756
/* Returning add info */
724-
if(!high_key_ptr->addInfoIsNull&&inter_call_data->add_info_oid!=0)
757+
if(!(high_key_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0
758+
&&inter_call_data->add_info_oid!=BYTEAOID)
759+
{
725760
values[4]=get_datum_text_by_oid(high_key_ptr->addInfo,
726761
inter_call_data->add_info_oid);
762+
}
763+
764+
/*
765+
* In this case, we are dealing with the positions
766+
* of tokens and they need to be decoded.
767+
*/
768+
elseif (!(high_key_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0
769+
&&inter_call_data->add_info_oid==BYTEAOID)
770+
{
771+
/* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772+
values[4]=CStringGetTextDatum("high key positions in posting tree is not supported");
773+
}
774+
727775
elsenulls[4]= true;
728776

729777
/* Forming the returned tuple */
@@ -745,9 +793,24 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
745793
values[3]=BoolGetDatum(posting_item_ptr->item.addInfoIsNull);
746794

747795
/* Returning add info */
748-
if(!posting_item_ptr->item.addInfoIsNull&&inter_call_data->add_info_oid!=0)
796+
if(!posting_item_ptr->item.addInfoIsNull&&inter_call_data->add_info_oid!=0
797+
&&inter_call_data->add_info_oid!=BYTEAOID)
798+
{
749799
values[4]=get_datum_text_by_oid(posting_item_ptr->item.addInfo,
750800
inter_call_data->add_info_oid);
801+
}
802+
803+
/*
804+
* In this case, we are dealing with the positions
805+
* of tokens and they need to be decoded.
806+
*/
807+
elseif (!posting_item_ptr->item.addInfoIsNull&&inter_call_data->add_info_oid!=0
808+
&&inter_call_data->add_info_oid==BYTEAOID)
809+
{
810+
/* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811+
values[4]=CStringGetTextDatum("high key positions in posting tree is not supported");
812+
}
813+
751814
elsenulls[4]= true;
752815

753816
/* Forming the returned tuple */
@@ -833,12 +896,6 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
833896
/* Allocating memory for a long-lived structure */
834897
inter_call_data=palloc(sizeof(rum_page_items_state));
835898

836-
/* Initializing the RumState structure */
837-
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
838-
initRumState(inter_call_data->rum_state_ptr,rel);
839-
840-
relation_close(rel,AccessShareLock);
841-
842899
/* Getting a copy of the page from the raw page */
843900
page=get_page_from_raw(raw_page);
844901

@@ -869,6 +926,12 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
869926
errdetail("Flags %04X, expected %04X",
870927
opaq->flags,RUM_LEAF)));
871928

929+
/* Initializing the RumState structure */
930+
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
931+
initRumState(inter_call_data->rum_state_ptr,rel);
932+
933+
relation_close(rel,AccessShareLock);
934+
872935
/* Build a tuple descriptor for our result type */
873936
if (get_call_result_type(fcinfo,NULL,&tupdesc)!=TYPEFUNC_COMPOSITE)
874937
elog(ERROR,"return type must be a row type");
@@ -1008,10 +1071,23 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10081071
values[3]=ItemPointerGetDatum(&(rum_item_ptr->iptr));
10091072
values[4]=BoolGetDatum(rum_item_ptr->addInfoIsNull);
10101073

1011-
10121074
/* Returning add info */
1013-
if(!(rum_item_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0)
1075+
if (!(rum_item_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0&&
1076+
inter_call_data->add_info_oid!=BYTEAOID)
1077+
{
10141078
values[5]=get_datum_text_by_oid(rum_item_ptr->addInfo,inter_call_data->add_info_oid);
1079+
}
1080+
1081+
/*
1082+
* In this case, we are dealing with the positions
1083+
* of tokens and they need to be decoded.
1084+
*/
1085+
elseif (!(rum_item_ptr->addInfoIsNull)&&inter_call_data->add_info_oid!=0
1086+
&&inter_call_data->add_info_oid==BYTEAOID)
1087+
{
1088+
values[5]=get_positions_to_text_datum(rum_item_ptr->addInfo);
1089+
}
1090+
10151091
elsenulls[5]= true;
10161092

10171093
/* The current IndexTuple does not contain a posting tree */
@@ -1101,12 +1177,6 @@ rum_internal_entry_page_items(PG_FUNCTION_ARGS)
11011177
/* Allocating memory for a long-lived structure */
11021178
inter_call_data=palloc(sizeof(rum_page_items_state));
11031179

1104-
/* Initializing the RumState structure */
1105-
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
1106-
initRumState(inter_call_data->rum_state_ptr,rel);
1107-
1108-
relation_close(rel,AccessShareLock);
1109-
11101180
/* Getting a copy of the page from the raw page */
11111181
page=get_page_from_raw(raw_page);
11121182

@@ -1137,6 +1207,12 @@ rum_internal_entry_page_items(PG_FUNCTION_ARGS)
11371207
errdetail("Flags %04X, expected %04X",
11381208
opaq->flags,0)));
11391209

1210+
/* Initializing the RumState structure */
1211+
inter_call_data->rum_state_ptr=palloc(sizeof(RumState));
1212+
initRumState(inter_call_data->rum_state_ptr,rel);
1213+
1214+
relation_close(rel,AccessShareLock);
1215+
11401216
/* Build a tuple descriptor for our result type */
11411217
if (get_call_result_type(fcinfo,NULL,&tupdesc)!=TYPEFUNC_COMPOSITE)
11421218
elog(ERROR,"return type must be a row type");
@@ -1355,7 +1431,7 @@ get_page_from_raw(bytea *raw_page)
13551431
* TODO: All types accepted by rum must be checked, but
13561432
* perhaps some types are missing or some are superfluous.
13571433
*/
1358-
staticDatum
1434+
staticDatum
13591435
get_datum_text_by_oid(Datuminfo,Oidinfo_oid)
13601436
{
13611437
char*str_info=NULL;
@@ -1602,3 +1678,69 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16021678
/* Need to add 1 because the attributes are numbered from 1 */
16031679
returnadd_info_attr_num+1;
16041680
}
1681+
1682+
#definePOS_STR_BUF_LENGHT 1024
1683+
#definePOS_MAX_VAL_LENGHT 6
1684+
1685+
/*
1686+
* A function for extracting the positions of tokens from additional
1687+
* information. Returns a string in which the positions of the tokens
1688+
* are recorded. The memory that the string occupies must be cleared later.
1689+
*/
1690+
staticDatum
1691+
get_positions_to_text_datum(Datumadd_info)
1692+
{
1693+
bytea*positions;
1694+
char*ptrt;
1695+
WordEntryPosposition=0;
1696+
int32npos;
1697+
1698+
Datumres;
1699+
char*positions_str;
1700+
char*positions_str_cur_ptr;
1701+
intcur_max_str_lenght;
1702+
1703+
positions=DatumGetByteaP(add_info);
1704+
ptrt= (char*)VARDATA_ANY(positions);
1705+
npos=count_pos(VARDATA_ANY(positions),
1706+
VARSIZE_ANY_EXHDR(positions));
1707+
1708+
/* Initialize the string */
1709+
positions_str= (char*)palloc(POS_STR_BUF_LENGHT*sizeof(char));
1710+
positions_str[0]='\0';
1711+
cur_max_str_lenght=POS_STR_BUF_LENGHT;
1712+
positions_str_cur_ptr=positions_str;
1713+
1714+
/* Extract the positions of the tokens and put them in the string */
1715+
for (inti=0;i<npos;i++)
1716+
{
1717+
/* At each iteration decode the position */
1718+
ptrt=decompress_pos(ptrt,&position);
1719+
1720+
/* Write this position in the string */
1721+
sprintf(positions_str_cur_ptr,"%d,",position);
1722+
1723+
/* Moving the pointer forward */
1724+
positions_str_cur_ptr+=strlen(positions_str_cur_ptr);
1725+
1726+
/*
1727+
* Check that there is not too little left to the
1728+
* end of the line and, if necessary, overspend
1729+
* the memory.
1730+
*/
1731+
if (cur_max_str_lenght- (positions_str_cur_ptr-positions_str) <=POS_MAX_VAL_LENGHT)
1732+
{
1733+
cur_max_str_lenght+=POS_STR_BUF_LENGHT;
1734+
positions_str= (char*)repalloc(positions_str,cur_max_str_lenght*sizeof(char));
1735+
positions_str_cur_ptr=positions_str+strlen(positions_str);
1736+
}
1737+
}
1738+
1739+
/* Delete the last comma if there has been at least one iteration of the loop */
1740+
if (npos>0)
1741+
positions_str[strlen(positions_str)-1]='\0';
1742+
1743+
res=CStringGetTextDatum(positions_str);
1744+
pfree(positions_str);
1745+
returnres;
1746+
}

‎src/rum_ts_utils.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include"catalog/pg_type.h"
1717
#include"funcapi.h"
1818
#include"miscadmin.h"
19-
#include"tsearch/ts_type.h"
2019
#include"tsearch/ts_utils.h"
2120
#include"utils/array.h"
2221
#include"utils/builtins.h"
@@ -80,8 +79,6 @@ PG_FUNCTION_INFO_V1(rum_ts_join_pos);
8079

8180
PG_FUNCTION_INFO_V1(tsquery_to_distance_query);
8281

83-
staticunsignedintcount_pos(char*ptr,intlen);
84-
staticchar*decompress_pos(char*ptr,WordEntryPos*pos);
8582
staticDatumbuild_tsvector_entry(TSVectorvector,WordEntry*we);
8683
staticDatumbuild_tsvector_hash_entry(TSVectorvector,WordEntry*we);
8784
staticDatumbuild_tsquery_entry(TSQueryquery,QueryOperand*operand);
@@ -964,7 +961,6 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS)
964961
}
965962

966963
#defineSIXTHBIT 0x20
967-
#defineLOWERMASK 0x1F
968964

969965
staticunsignedint
970966
compress_pos(char*target,WordEntryPos*pos,intnpos)
@@ -999,7 +995,7 @@ compress_pos(char *target, WordEntryPos *pos, int npos)
999995
returnptr-target;
1000996
}
1001997

1002-
staticchar*
998+
externchar*
1003999
decompress_pos(char*ptr,WordEntryPos*pos)
10041000
{
10051001
inti;
@@ -1027,7 +1023,7 @@ decompress_pos(char *ptr, WordEntryPos *pos)
10271023
}
10281024
}
10291025

1030-
staticunsignedint
1026+
externunsignedint
10311027
count_pos(char*ptr,intlen)
10321028
{
10331029
intcount=0,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp