1414 * 2) I/O functions were not available for all types in
1515 * in the get_datum_text_by_oid() function.
1616 *
17- * 3) SIGSEGV in case of bytea output as additional information.
17+ * 3) The output of lexeme positions in the high keys of the posting
18+ * tree is not supported.
1819 */
1920
2021#include "postgres.h"
22+ #include "miscadmin.h"
2123#include "fmgr.h"
2224#include "funcapi.h"
2325#include "catalog/namespace.h"
@@ -115,8 +117,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115117static Datum category_get_datum_text (RumNullCategory category );
116118static Oid find_add_info_oid (RumState * rum_state_ptr );
117119static OffsetNumber find_add_info_atrr_num (RumState * rum_state_ptr );
118-
119120static Datum get_positions_to_text_datum (Datum add_info );
121+ static char pos_get_weight (WordEntryPos position );
120122
121123/*
122124 * The rum_metapage_info() function is used to retrieve
@@ -472,7 +474,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472474 */
473475if (fctx -> call_cntr <=inter_call_data -> maxoff )
474476{
475- RumItem * high_key_ptr ;
477+ RumItem * high_key_ptr ;/* to read high key from a page */
476478RumItem * rum_item_ptr ;/* to read data from a page */
477479Datum values [4 ];/* return values */
478480bool nulls [4 ];/* true if the corresponding value is NULL */
@@ -497,7 +499,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497499values [2 ]= BoolGetDatum (high_key_ptr -> addInfoIsNull );
498500
499501/* Returning add info */
500- if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
502+ if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
501503&& inter_call_data -> add_info_oid != BYTEAOID )
502504{
503505values [3 ]= get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -506,12 +508,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506508
507509/*
508510 * In this case, we are dealing with the positions
509- * oftokens and they need to be decoded.
511+ * oflexemes and they need to be decoded.
510512 */
511- else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
513+ else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
512514&& inter_call_data -> add_info_oid == BYTEAOID )
513515{
514- /* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515516values [3 ]= CStringGetTextDatum ("high key positions in posting tree is not supported" );
516517}
517518
@@ -525,26 +526,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525526SRF_RETURN_NEXT (fctx ,result );
526527}
527528
528- /*
529- * Reading information from the page in rum_item.
530- *
531- * TODO: The fact is that being on the posting tree page, we don't know which
532- * index attribute this posting tree was built for, so we don't know the
533- * attribute number of the additional information. But the rumDataPageLeafRead()
534- * function requires it to read information from the page. Here we use the auxiliary
535- * function find_add_info_atr_num(), which simply iterates through the array with
536- * attributes that are additional information and selects the attribute number for
537- * which the additional information attribute is not NULL. This approach is incorrect
538- * because there may not be additional information for the attribute on the page,
539- * but we hope that in this case add_info_is_null will have the value true and the
540- * additional information will not be read.
541- *
542- * This problem can be solved by asking the user for the attribute number of
543- * additional information, because going through the index from top to bottom,
544- * he saw it next to the link to the posting tree root.
545- */
529+ /* Reading information from the page in rum_item */
546530inter_call_data -> item_ptr = rumDataPageLeafRead (inter_call_data -> item_ptr ,
547- /* inter_call_data->cur_tuple_key_attnum, */
548531find_add_info_atrr_num (inter_call_data -> rum_state_ptr ),
549532rum_item_ptr , false,inter_call_data -> rum_state_ptr );
550533
@@ -554,7 +537,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554537values [2 ]= BoolGetDatum (rum_item_ptr -> addInfoIsNull );
555538
556539/* Returning add info */
557- if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
540+ if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
558541&& inter_call_data -> add_info_oid != BYTEAOID )
559542{
560543values [3 ]= get_datum_text_by_oid (rum_item_ptr -> addInfo ,
@@ -563,9 +546,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563546
564547/*
565548 * In this case, we are dealing with the positions
566- * oftokens and they need to be decoded.
549+ * oflexemes and they need to be decoded.
567550 */
568- else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
551+ else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
569552&& inter_call_data -> add_info_oid == BYTEAOID )
570553{
571554values [3 ]= get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -729,7 +712,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729712 */
730713if (fctx -> call_cntr <=inter_call_data -> maxoff )
731714{
732- RumItem * high_key_ptr ;
715+ RumItem * high_key_ptr ;/* to read high key from a page */
733716PostingItem * posting_item_ptr ;/* to read data from a page */
734717Datum values [5 ];/* returned values */
735718bool nulls [5 ];/* true if the corresponding returned value is NULL */
@@ -754,7 +737,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754737values [3 ]= BoolGetDatum (high_key_ptr -> addInfoIsNull );
755738
756739/* Returning add info */
757- if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
740+ if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
758741&& inter_call_data -> add_info_oid != BYTEAOID )
759742{
760743values [4 ]= get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -763,12 +746,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763746
764747/*
765748 * In this case, we are dealing with the positions
766- * oftokens and they need to be decoded.
749+ * oflexemes and they need to be decoded.
767750 */
768- else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
751+ else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
769752&& inter_call_data -> add_info_oid == BYTEAOID )
770753{
771- /* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772754values [4 ]= CStringGetTextDatum ("high key positions in posting tree is not supported" );
773755}
774756
@@ -793,7 +775,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793775values [3 ]= BoolGetDatum (posting_item_ptr -> item .addInfoIsNull );
794776
795777/* Returning add info */
796- if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
778+ if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
797779&& inter_call_data -> add_info_oid != BYTEAOID )
798780{
799781values [4 ]= get_datum_text_by_oid (posting_item_ptr -> item .addInfo ,
@@ -802,12 +784,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802784
803785/*
804786 * In this case, we are dealing with the positions
805- * oftokens and they need to be decoded.
787+ * oflexemes and they need to be decoded.
806788 */
807- else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
789+ else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
808790&& inter_call_data -> add_info_oid == BYTEAOID )
809791{
810- /* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811792values [4 ]= CStringGetTextDatum ("high key positions in posting tree is not supported" );
812793}
813794
@@ -1072,17 +1053,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10721053values [4 ]= BoolGetDatum (rum_item_ptr -> addInfoIsNull );
10731054
10741055/* Returning add info */
1075- if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0 &&
1056+ if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid &&
10761057inter_call_data -> add_info_oid != BYTEAOID )
10771058{
10781059values [5 ]= get_datum_text_by_oid (rum_item_ptr -> addInfo ,inter_call_data -> add_info_oid );
10791060}
10801061
10811062/*
10821063 * In this case, we are dealing with the positions
1083- * oftokens and they need to be decoded.
1064+ * oflexemes and they need to be decoded.
10841065 */
1085- else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
1066+ else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
10861067&& inter_call_data -> add_info_oid == BYTEAOID )
10871068{
10881069values [5 ]= get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -1427,22 +1408,16 @@ get_page_from_raw(bytea *raw_page)
14271408 * int2, int4, int8, float4, float8, money, oid, timestamp,
14281409 * timestamptz, time, timetz, date, interval, macaddr, inet,
14291410 * cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430- *
1431- * TODO: All types accepted by rum must be checked, but
1432- * perhaps some types are missing or some are superfluous.
14331411 */
14341412static Datum
14351413get_datum_text_by_oid (Datum info ,Oid info_oid )
14361414{
14371415char * str_info = NULL ;
14381416
1439- /* info cannot be NULL */
1440- Assert (DatumGetPointer (info )!= NULL );
1441-
14421417/*
14431418 * Form a string depending on the type of info.
14441419 *
1445- *FIXME : The macros used below are taken from the
1420+ *TODO : The macros used below are taken from the
14461421 * pg_type_d file.h, and it says not to use them
14471422 * in the new code.
14481423 */
@@ -1528,18 +1503,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
15281503str_info = OidOutputFunctionCall (F_CHAROUT ,info );
15291504break ;
15301505
1531- /*
1532- * TODO: For some reason, the rum index created for a single tsv
1533- * field contains additional information as bytea. In addition,
1534- * if additional information in this format is extracted from
1535- * posting tree pages, it cannot be displayed correctly as text.
1536- * If the additional information was extracted from the entry
1537- * tree pages, then it is displayed correctly.
1538- */
15391506case BYTEAOID :
1540- /* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541- /* break; */
1542- return CStringGetTextDatum ("BYTEAOID is not supported" );
1507+ str_info = OidOutputFunctionCall (F_BYTEAOUT ,info );
1508+ break ;
15431509
15441510case BITOID :
15451511str_info = OidOutputFunctionCall (F_BIT_OUT ,info );
@@ -1634,14 +1600,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
16341600 * the Oid of additional information for an attribute for
16351601 * which it is not NULL.
16361602 *
1637- *TODO: The logic of the function assumes that there cannot
1603+ * The logic of the function assumes that there cannot
16381604 * be several types of additional information in the index,
16391605 * otherwise it will not work.
16401606 */
16411607static Oid
16421608find_add_info_oid (RumState * rum_state_ptr )
16431609{
1644- Oid add_info_oid = 0 ;
1610+ Oid add_info_oid = InvalidOid ;
16451611
16461612/* Number of index attributes */
16471613int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
@@ -1651,8 +1617,13 @@ find_add_info_oid(RumState *rum_state_ptr)
16511617 * oid of additional information.
16521618 */
16531619for (int i = 0 ;i < num_attrs ;i ++ )
1620+ {
16541621if ((rum_state_ptr -> addAttrs )[i ]!= NULL )
1622+ {
1623+ Assert (add_info_oid == InvalidOid );
16551624add_info_oid = ((rum_state_ptr -> addAttrs )[i ])-> atttypid ;
1625+ }
1626+ }
16561627
16571628return add_info_oid ;
16581629}
@@ -1661,19 +1632,28 @@ find_add_info_oid(RumState *rum_state_ptr)
16611632 * This is an auxiliary function to get the attribute number
16621633 * for additional information. It is used in the rum_leaf_data_page_items()
16631634 * function to call the rumDataPageLeafRead() function.
1635+ *
1636+ * The logic of the function assumes that there cannot
1637+ * be several types of additional information in the index,
1638+ * otherwise it will not work.
16641639 */
16651640static OffsetNumber
16661641find_add_info_atrr_num (RumState * rum_state_ptr )
16671642{
1668- OffsetNumber add_info_attr_num = 0 ;
1643+ OffsetNumber add_info_attr_num = InvalidOffsetNumber ;
16691644
16701645/* Number of index attributes */
16711646int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
16721647
16731648/* Go through the addAttrs array */
1674- for (int i = 0 ;i < num_attrs ;i ++ )
1649+ for (int i = 0 ;i < num_attrs ;i ++ )
1650+ {
16751651if ((rum_state_ptr -> addAttrs )[i ]!= NULL )
1652+ {
1653+ Assert (add_info_attr_num == InvalidOffsetNumber );
16761654add_info_attr_num = i ;
1655+ }
1656+ }
16771657
16781658/* Need to add 1 because the attributes are numbered from 1 */
16791659return add_info_attr_num + 1 ;
@@ -1683,8 +1663,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16831663#define POS_MAX_VAL_LENGHT 6
16841664
16851665/*
1686- * A function for extracting the positions oftokens from additional
1687- * information. Returns a string in which the positions of thetokens
1666+ * A function for extracting the positions oflexemes from additional
1667+ * information. Returns a string in which the positions of thelexemes
16881668 * are recorded. The memory that the string occupies must be cleared later.
16891669 */
16901670static Datum
@@ -1711,14 +1691,17 @@ get_positions_to_text_datum(Datum add_info)
17111691cur_max_str_lenght = POS_STR_BUF_LENGHT ;
17121692positions_str_cur_ptr = positions_str ;
17131693
1714- /* Extract the positions of thetokens and put them in the string */
1694+ /* Extract the positions of thelexemes and put them in the string */
17151695for (int i = 0 ;i < npos ;i ++ )
17161696{
17171697/* At each iteration decode the position */
17181698ptrt = decompress_pos (ptrt ,& position );
17191699
1720- /* Write this position in the string */
1721- sprintf (positions_str_cur_ptr ,"%d," ,position );
1700+ /* Write this position and weight in the string */
1701+ if (pos_get_weight (position )== 'D' )
1702+ sprintf (positions_str_cur_ptr ,"%d," ,WEP_GETPOS (position ));
1703+ else
1704+ sprintf (positions_str_cur_ptr ,"%d%c," ,WEP_GETPOS (position ),pos_get_weight (position ));
17221705
17231706/* Moving the pointer forward */
17241707positions_str_cur_ptr += strlen (positions_str_cur_ptr );
@@ -1744,3 +1727,25 @@ get_positions_to_text_datum(Datum add_info)
17441727pfree (positions_str );
17451728return res ;
17461729}
1730+
1731+ /*
1732+ * The function extracts the weight and
1733+ * returns the corresponding letter.
1734+ */
1735+ static char
1736+ pos_get_weight (WordEntryPos position )
1737+ {
1738+ char res = 'D' ;
1739+
1740+ switch (WEP_GETWEIGHT (position ))
1741+ {
1742+ case 3 :
1743+ return 'A' ;
1744+ case 2 :
1745+ return 'B' ;
1746+ case 1 :
1747+ return 'C' ;
1748+ }
1749+
1750+ return res ;
1751+ }