1414 * 2) I/O functions were not available for all types in
1515 * in the get_datum_text_by_oid() function.
1616 *
17- * 3) SIGSEGV in case of bytea output as additional information.
17+ * 3) The output of lexeme positions in the high keys of the posting
18+ * tree is not supported.
1819 */
1920
2021#include "postgres.h"
@@ -115,8 +116,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115116static Datum category_get_datum_text (RumNullCategory category );
116117static Oid find_add_info_oid (RumState * rum_state_ptr );
117118static OffsetNumber find_add_info_atrr_num (RumState * rum_state_ptr );
118-
119119static Datum get_positions_to_text_datum (Datum add_info );
120+ static char pos_get_weight (WordEntryPos position );
120121
121122/*
122123 * The rum_metapage_info() function is used to retrieve
@@ -472,7 +473,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472473 */
473474if (fctx -> call_cntr <=inter_call_data -> maxoff )
474475{
475- RumItem * high_key_ptr ;
476+ RumItem * high_key_ptr ;/* to read high key from a page */
476477RumItem * rum_item_ptr ;/* to read data from a page */
477478Datum values [4 ];/* return values */
478479bool nulls [4 ];/* true if the corresponding value is NULL */
@@ -497,7 +498,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497498values [2 ]= BoolGetDatum (high_key_ptr -> addInfoIsNull );
498499
499500/* Returning add info */
500- if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
501+ if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
501502&& inter_call_data -> add_info_oid != BYTEAOID )
502503{
503504values [3 ]= get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -506,12 +507,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506507
507508/*
508509 * In this case, we are dealing with the positions
509- * oftokens and they need to be decoded.
510+ * oflexemes and they need to be decoded.
510511 */
511- else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
512+ else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
512513&& inter_call_data -> add_info_oid == BYTEAOID )
513514{
514- /* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515515values [3 ]= CStringGetTextDatum ("high key positions in posting tree is not supported" );
516516}
517517
@@ -525,26 +525,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525525SRF_RETURN_NEXT (fctx ,result );
526526}
527527
528- /*
529- * Reading information from the page in rum_item.
530- *
531- * TODO: The fact is that being on the posting tree page, we don't know which
532- * index attribute this posting tree was built for, so we don't know the
533- * attribute number of the additional information. But the rumDataPageLeafRead()
534- * function requires it to read information from the page. Here we use the auxiliary
535- * function find_add_info_atr_num(), which simply iterates through the array with
536- * attributes that are additional information and selects the attribute number for
537- * which the additional information attribute is not NULL. This approach is incorrect
538- * because there may not be additional information for the attribute on the page,
539- * but we hope that in this case add_info_is_null will have the value true and the
540- * additional information will not be read.
541- *
542- * This problem can be solved by asking the user for the attribute number of
543- * additional information, because going through the index from top to bottom,
544- * he saw it next to the link to the posting tree root.
545- */
528+ /* Reading information from the page in rum_item */
546529inter_call_data -> item_ptr = rumDataPageLeafRead (inter_call_data -> item_ptr ,
547- /* inter_call_data->cur_tuple_key_attnum, */
548530find_add_info_atrr_num (inter_call_data -> rum_state_ptr ),
549531rum_item_ptr , false,inter_call_data -> rum_state_ptr );
550532
@@ -554,7 +536,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554536values [2 ]= BoolGetDatum (rum_item_ptr -> addInfoIsNull );
555537
556538/* Returning add info */
557- if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
539+ if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
558540&& inter_call_data -> add_info_oid != BYTEAOID )
559541{
560542values [3 ]= get_datum_text_by_oid (rum_item_ptr -> addInfo ,
@@ -563,9 +545,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563545
564546/*
565547 * In this case, we are dealing with the positions
566- * oftokens and they need to be decoded.
548+ * oflexemes and they need to be decoded.
567549 */
568- else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
550+ else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
569551&& inter_call_data -> add_info_oid == BYTEAOID )
570552{
571553values [3 ]= get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -729,7 +711,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729711 */
730712if (fctx -> call_cntr <=inter_call_data -> maxoff )
731713{
732- RumItem * high_key_ptr ;
714+ RumItem * high_key_ptr ;/* to read high key from a page */
733715PostingItem * posting_item_ptr ;/* to read data from a page */
734716Datum values [5 ];/* returned values */
735717bool nulls [5 ];/* true if the corresponding returned value is NULL */
@@ -754,7 +736,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754736values [3 ]= BoolGetDatum (high_key_ptr -> addInfoIsNull );
755737
756738/* Returning add info */
757- if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
739+ if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
758740&& inter_call_data -> add_info_oid != BYTEAOID )
759741{
760742values [4 ]= get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -763,12 +745,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763745
764746/*
765747 * In this case, we are dealing with the positions
766- * oftokens and they need to be decoded.
748+ * oflexemes and they need to be decoded.
767749 */
768- else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
750+ else if (!(high_key_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
769751&& inter_call_data -> add_info_oid == BYTEAOID )
770752{
771- /* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772753values [4 ]= CStringGetTextDatum ("high key positions in posting tree is not supported" );
773754}
774755
@@ -793,7 +774,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793774values [3 ]= BoolGetDatum (posting_item_ptr -> item .addInfoIsNull );
794775
795776/* Returning add info */
796- if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
777+ if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
797778&& inter_call_data -> add_info_oid != BYTEAOID )
798779{
799780values [4 ]= get_datum_text_by_oid (posting_item_ptr -> item .addInfo ,
@@ -802,12 +783,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802783
803784/*
804785 * In this case, we are dealing with the positions
805- * oftokens and they need to be decoded.
786+ * oflexemes and they need to be decoded.
806787 */
807- else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
788+ else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
808789&& inter_call_data -> add_info_oid == BYTEAOID )
809790{
810- /* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811791values [4 ]= CStringGetTextDatum ("high key positions in posting tree is not supported" );
812792}
813793
@@ -1072,17 +1052,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10721052values [4 ]= BoolGetDatum (rum_item_ptr -> addInfoIsNull );
10731053
10741054/* Returning add info */
1075- if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0 &&
1055+ if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid &&
10761056inter_call_data -> add_info_oid != BYTEAOID )
10771057{
10781058values [5 ]= get_datum_text_by_oid (rum_item_ptr -> addInfo ,inter_call_data -> add_info_oid );
10791059}
10801060
10811061/*
10821062 * In this case, we are dealing with the positions
1083- * oftokens and they need to be decoded.
1063+ * oflexemes and they need to be decoded.
10841064 */
1085- else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != 0
1065+ else if (!(rum_item_ptr -> addInfoIsNull )&& inter_call_data -> add_info_oid != InvalidOid
10861066&& inter_call_data -> add_info_oid == BYTEAOID )
10871067{
10881068values [5 ]= get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -1427,22 +1407,16 @@ get_page_from_raw(bytea *raw_page)
14271407 * int2, int4, int8, float4, float8, money, oid, timestamp,
14281408 * timestamptz, time, timetz, date, interval, macaddr, inet,
14291409 * cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430- *
1431- * TODO: All types accepted by rum must be checked, but
1432- * perhaps some types are missing or some are superfluous.
14331410 */
14341411static Datum
14351412get_datum_text_by_oid (Datum info ,Oid info_oid )
14361413{
14371414char * str_info = NULL ;
14381415
1439- /* info cannot be NULL */
1440- Assert (DatumGetPointer (info )!= NULL );
1441-
14421416/*
14431417 * Form a string depending on the type of info.
14441418 *
1445- *FIXME : The macros used below are taken from the
1419+ *TODO : The macros used below are taken from the
14461420 * pg_type_d file.h, and it says not to use them
14471421 * in the new code.
14481422 */
@@ -1528,18 +1502,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
15281502str_info = OidOutputFunctionCall (F_CHAROUT ,info );
15291503break ;
15301504
1531- /*
1532- * TODO: For some reason, the rum index created for a single tsv
1533- * field contains additional information as bytea. In addition,
1534- * if additional information in this format is extracted from
1535- * posting tree pages, it cannot be displayed correctly as text.
1536- * If the additional information was extracted from the entry
1537- * tree pages, then it is displayed correctly.
1538- */
15391505case BYTEAOID :
1540- /* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541- /* break; */
1542- return CStringGetTextDatum ("BYTEAOID is not supported" );
1506+ str_info = OidOutputFunctionCall (F_BYTEAOUT ,info );
1507+ break ;
15431508
15441509case BITOID :
15451510str_info = OidOutputFunctionCall (F_BIT_OUT ,info );
@@ -1634,14 +1599,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
16341599 * the Oid of additional information for an attribute for
16351600 * which it is not NULL.
16361601 *
1637- *TODO: The logic of the function assumes that there cannot
1602+ * The logic of the function assumes that there cannot
16381603 * be several types of additional information in the index,
16391604 * otherwise it will not work.
16401605 */
16411606static Oid
16421607find_add_info_oid (RumState * rum_state_ptr )
16431608{
1644- Oid add_info_oid = 0 ;
1609+ Oid add_info_oid = InvalidOid ;
16451610
16461611/* Number of index attributes */
16471612int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
@@ -1651,8 +1616,13 @@ find_add_info_oid(RumState *rum_state_ptr)
16511616 * oid of additional information.
16521617 */
16531618for (int i = 0 ;i < num_attrs ;i ++ )
1619+ {
16541620if ((rum_state_ptr -> addAttrs )[i ]!= NULL )
1621+ {
1622+ Assert (add_info_oid == InvalidOid );
16551623add_info_oid = ((rum_state_ptr -> addAttrs )[i ])-> atttypid ;
1624+ }
1625+ }
16561626
16571627return add_info_oid ;
16581628}
@@ -1661,19 +1631,28 @@ find_add_info_oid(RumState *rum_state_ptr)
16611631 * This is an auxiliary function to get the attribute number
16621632 * for additional information. It is used in the rum_leaf_data_page_items()
16631633 * function to call the rumDataPageLeafRead() function.
1634+ *
1635+ * The logic of the function assumes that there cannot
1636+ * be several types of additional information in the index,
1637+ * otherwise it will not work.
16641638 */
16651639static OffsetNumber
16661640find_add_info_atrr_num (RumState * rum_state_ptr )
16671641{
1668- OffsetNumber add_info_attr_num = 0 ;
1642+ OffsetNumber add_info_attr_num = InvalidOffsetNumber ;
16691643
16701644/* Number of index attributes */
16711645int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
16721646
16731647/* Go through the addAttrs array */
1674- for (int i = 0 ;i < num_attrs ;i ++ )
1648+ for (int i = 0 ;i < num_attrs ;i ++ )
1649+ {
16751650if ((rum_state_ptr -> addAttrs )[i ]!= NULL )
1651+ {
1652+ Assert (add_info_attr_num == InvalidOffsetNumber );
16761653add_info_attr_num = i ;
1654+ }
1655+ }
16771656
16781657/* Need to add 1 because the attributes are numbered from 1 */
16791658return add_info_attr_num + 1 ;
@@ -1683,8 +1662,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16831662#define POS_MAX_VAL_LENGHT 6
16841663
16851664/*
1686- * A function for extracting the positions oftokens from additional
1687- * information. Returns a string in which the positions of thetokens
1665+ * A function for extracting the positions oflexemes from additional
1666+ * information. Returns a string in which the positions of thelexemes
16881667 * are recorded. The memory that the string occupies must be cleared later.
16891668 */
16901669static Datum
@@ -1711,14 +1690,17 @@ get_positions_to_text_datum(Datum add_info)
17111690cur_max_str_lenght = POS_STR_BUF_LENGHT ;
17121691positions_str_cur_ptr = positions_str ;
17131692
1714- /* Extract the positions of thetokens and put them in the string */
1693+ /* Extract the positions of thelexemes and put them in the string */
17151694for (int i = 0 ;i < npos ;i ++ )
17161695{
17171696/* At each iteration decode the position */
17181697ptrt = decompress_pos (ptrt ,& position );
17191698
1720- /* Write this position in the string */
1721- sprintf (positions_str_cur_ptr ,"%d," ,position );
1699+ /* Write this position and weight in the string */
1700+ if (pos_get_weight (position )== 'D' )
1701+ sprintf (positions_str_cur_ptr ,"%d," ,WEP_GETPOS (position ));
1702+ else
1703+ sprintf (positions_str_cur_ptr ,"%d%c," ,WEP_GETPOS (position ),pos_get_weight (position ));
17221704
17231705/* Moving the pointer forward */
17241706positions_str_cur_ptr += strlen (positions_str_cur_ptr );
@@ -1744,3 +1726,25 @@ get_positions_to_text_datum(Datum add_info)
17441726pfree (positions_str );
17451727return res ;
17461728}
1729+
1730+ /*
1731+ * The function extracts the weight and
1732+ * returns the corresponding letter.
1733+ */
1734+ static char
1735+ pos_get_weight (WordEntryPos position )
1736+ {
1737+ char res = 'D' ;
1738+
1739+ switch (WEP_GETWEIGHT (position ))
1740+ {
1741+ case 3 :
1742+ return 'A' ;
1743+ case 2 :
1744+ return 'B' ;
1745+ case 1 :
1746+ return 'C' ;
1747+ }
1748+
1749+ return res ;
1750+ }