Skip to content

Commit be50834

Browse files
author
Arseny Kositsyn
committed
[PGPRO-12159] Added the output of weights.
If the index is created with the appropriate class of operators, then in addition to the positions of the lexemes, weights (A, B, C, D) are also stored in the additional information. Their output has been added. In addition, Asserts have been added to the find_add_info_atr_num() and find_add_info_oid() functions, which check that there is only one (or zero) type of additional information in the index. Tags: rum
1 parent 348104a commit be50834

File tree

1 file changed

+72
-68
lines changed

1 file changed

+72
-68
lines changed

src/rum_debug_funcs.c

+72-68
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
* 2) I/O functions were not available for all types in
1515
* in the get_datum_text_by_oid() function.
1616
*
17-
* 3) SIGSEGV in case of bytea output as additional information.
17+
* 3) The output of lexeme positions in the high keys of the posting
18+
* tree is not supported.
1819
*/
1920

2021
#include "postgres.h"
@@ -115,8 +116,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115116
static Datum category_get_datum_text(RumNullCategory category);
116117
static Oid find_add_info_oid(RumState *rum_state_ptr);
117118
static OffsetNumber find_add_info_atrr_num(RumState *rum_state_ptr);
118-
119119
static Datum get_positions_to_text_datum(Datum add_info);
120+
static char pos_get_weight(WordEntryPos position);
120121

121122
/*
122123
* The rum_metapage_info() function is used to retrieve
@@ -472,7 +473,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472473
*/
473474
if(fctx->call_cntr <= inter_call_data->maxoff)
474475
{
475-
RumItem *high_key_ptr;
476+
RumItem *high_key_ptr; /* to read high key from a page */
476477
RumItem *rum_item_ptr; /* to read data from a page */
477478
Datum values[4]; /* return values */
478479
bool nulls[4]; /* true if the corresponding value is NULL */
@@ -497,7 +498,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497498
values[2] = BoolGetDatum(high_key_ptr->addInfoIsNull);
498499

499500
/* Returning add info */
500-
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
501+
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
501502
&& inter_call_data->add_info_oid != BYTEAOID)
502503
{
503504
values[3] = get_datum_text_by_oid(high_key_ptr->addInfo,
@@ -506,12 +507,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506507

507508
/*
508509
* In this case, we are dealing with the positions
509-
* of tokens and they need to be decoded.
510+
* of lexemes and they need to be decoded.
510511
*/
511-
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
512+
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
512513
&& inter_call_data->add_info_oid == BYTEAOID)
513514
{
514-
/* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515515
values[3] = CStringGetTextDatum("high key positions in posting tree is not supported");
516516
}
517517

@@ -525,26 +525,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525525
SRF_RETURN_NEXT(fctx, result);
526526
}
527527

528-
/*
529-
* Reading information from the page in rum_item.
530-
*
531-
* TODO: The fact is that being on the posting tree page, we don't know which
532-
* index attribute this posting tree was built for, so we don't know the
533-
* attribute number of the additional information. But the rumDataPageLeafRead()
534-
* function requires it to read information from the page. Here we use the auxiliary
535-
* function find_add_info_atr_num(), which simply iterates through the array with
536-
* attributes that are additional information and selects the attribute number for
537-
* which the additional information attribute is not NULL. This approach is incorrect
538-
* because there may not be additional information for the attribute on the page,
539-
* but we hope that in this case add_info_is_null will have the value true and the
540-
* additional information will not be read.
541-
*
542-
* This problem can be solved by asking the user for the attribute number of
543-
* additional information, because going through the index from top to bottom,
544-
* he saw it next to the link to the posting tree root.
545-
*/
528+
/* Reading information from the page in rum_item */
546529
inter_call_data->item_ptr = rumDataPageLeafRead(inter_call_data->item_ptr,
547-
/* inter_call_data->cur_tuple_key_attnum, */
548530
find_add_info_atrr_num(inter_call_data->rum_state_ptr),
549531
rum_item_ptr, false, inter_call_data->rum_state_ptr);
550532

@@ -554,7 +536,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554536
values[2] = BoolGetDatum(rum_item_ptr->addInfoIsNull);
555537

556538
/* Returning add info */
557-
if(!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
539+
if(!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
558540
&& inter_call_data->add_info_oid != BYTEAOID)
559541
{
560542
values[3] = get_datum_text_by_oid(rum_item_ptr->addInfo,
@@ -563,9 +545,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563545

564546
/*
565547
* In this case, we are dealing with the positions
566-
* of tokens and they need to be decoded.
548+
* of lexemes and they need to be decoded.
567549
*/
568-
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
550+
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
569551
&& inter_call_data->add_info_oid == BYTEAOID)
570552
{
571553
values[3] = get_positions_to_text_datum(rum_item_ptr->addInfo);
@@ -729,7 +711,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729711
*/
730712
if(fctx->call_cntr <= inter_call_data->maxoff)
731713
{
732-
RumItem *high_key_ptr;
714+
RumItem *high_key_ptr; /* to read high key from a page */
733715
PostingItem *posting_item_ptr; /* to read data from a page */
734716
Datum values[5]; /* returned values */
735717
bool nulls[5]; /* true if the corresponding returned value is NULL */
@@ -754,7 +736,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754736
values[3] = BoolGetDatum(high_key_ptr->addInfoIsNull);
755737

756738
/* Returning add info */
757-
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
739+
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
758740
&& inter_call_data->add_info_oid != BYTEAOID)
759741
{
760742
values[4] = get_datum_text_by_oid(high_key_ptr->addInfo,
@@ -763,12 +745,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763745

764746
/*
765747
* In this case, we are dealing with the positions
766-
* of tokens and they need to be decoded.
748+
* of lexemes and they need to be decoded.
767749
*/
768-
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
750+
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
769751
&& inter_call_data->add_info_oid == BYTEAOID)
770752
{
771-
/* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772753
values[4] = CStringGetTextDatum("high key positions in posting tree is not supported");
773754
}
774755

@@ -793,7 +774,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793774
values[3] = BoolGetDatum(posting_item_ptr->item.addInfoIsNull);
794775

795776
/* Returning add info */
796-
if(!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != 0
777+
if(!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != InvalidOid
797778
&& inter_call_data->add_info_oid != BYTEAOID)
798779
{
799780
values[4] = get_datum_text_by_oid(posting_item_ptr->item.addInfo,
@@ -802,12 +783,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802783

803784
/*
804785
* In this case, we are dealing with the positions
805-
* of tokens and they need to be decoded.
786+
* of lexemes and they need to be decoded.
806787
*/
807-
else if (!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != 0
788+
else if (!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != InvalidOid
808789
&& inter_call_data->add_info_oid == BYTEAOID)
809790
{
810-
/* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811791
values[4] = CStringGetTextDatum("high key positions in posting tree is not supported");
812792
}
813793

@@ -1072,17 +1052,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10721052
values[4] = BoolGetDatum(rum_item_ptr->addInfoIsNull);
10731053

10741054
/* Returning add info */
1075-
if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0 &&
1055+
if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid &&
10761056
inter_call_data->add_info_oid != BYTEAOID)
10771057
{
10781058
values[5] = get_datum_text_by_oid(rum_item_ptr->addInfo, inter_call_data->add_info_oid);
10791059
}
10801060

10811061
/*
10821062
* In this case, we are dealing with the positions
1083-
* of tokens and they need to be decoded.
1063+
* of lexemes and they need to be decoded.
10841064
*/
1085-
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
1065+
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
10861066
&& inter_call_data->add_info_oid == BYTEAOID)
10871067
{
10881068
values[5] = get_positions_to_text_datum(rum_item_ptr->addInfo);
@@ -1427,22 +1407,16 @@ get_page_from_raw(bytea *raw_page)
14271407
* int2, int4, int8, float4, float8, money, oid, timestamp,
14281408
* timestamptz, time, timetz, date, interval, macaddr, inet,
14291409
* cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430-
*
1431-
* TODO: All types accepted by rum must be checked, but
1432-
* perhaps some types are missing or some are superfluous.
14331410
*/
14341411
static Datum
14351412
get_datum_text_by_oid(Datum info, Oid info_oid)
14361413
{
14371414
char *str_info = NULL;
14381415

1439-
/* info cannot be NULL */
1440-
Assert(DatumGetPointer(info) != NULL);
1441-
14421416
/*
14431417
* Form a string depending on the type of info.
14441418
*
1445-
* FIXME: The macros used below are taken from the
1419+
* TODO: The macros used below are taken from the
14461420
* pg_type_d file.h, and it says not to use them
14471421
* in the new code.
14481422
*/
@@ -1528,18 +1502,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
15281502
str_info = OidOutputFunctionCall(F_CHAROUT, info);
15291503
break;
15301504

1531-
/*
1532-
* TODO: For some reason, the rum index created for a single tsv
1533-
* field contains additional information as bytea. In addition,
1534-
* if additional information in this format is extracted from
1535-
* posting tree pages, it cannot be displayed correctly as text.
1536-
* If the additional information was extracted from the entry
1537-
* tree pages, then it is displayed correctly.
1538-
*/
15391505
case BYTEAOID:
1540-
/* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541-
/* break; */
1542-
return CStringGetTextDatum("BYTEAOID is not supported");
1506+
str_info = OidOutputFunctionCall(F_BYTEAOUT, info);
1507+
break;
15431508

15441509
case BITOID:
15451510
str_info = OidOutputFunctionCall(F_BIT_OUT, info);
@@ -1634,14 +1599,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
16341599
* the Oid of additional information for an attribute for
16351600
* which it is not NULL.
16361601
*
1637-
* TODO: The logic of the function assumes that there cannot
1602+
* The logic of the function assumes that there cannot
16381603
* be several types of additional information in the index,
16391604
* otherwise it will not work.
16401605
*/
16411606
static Oid
16421607
find_add_info_oid(RumState *rum_state_ptr)
16431608
{
1644-
Oid add_info_oid = 0;
1609+
Oid add_info_oid = InvalidOid;
16451610

16461611
/* Number of index attributes */
16471612
int num_attrs = rum_state_ptr->origTupdesc->natts;
@@ -1651,8 +1616,13 @@ find_add_info_oid(RumState *rum_state_ptr)
16511616
* oid of additional information.
16521617
*/
16531618
for (int i = 0; i < num_attrs; i++)
1619+
{
16541620
if ((rum_state_ptr->addAttrs)[i] != NULL)
1621+
{
1622+
Assert(add_info_oid == InvalidOid);
16551623
add_info_oid = ((rum_state_ptr->addAttrs)[i])->atttypid;
1624+
}
1625+
}
16561626

16571627
return add_info_oid;
16581628
}
@@ -1661,19 +1631,28 @@ find_add_info_oid(RumState *rum_state_ptr)
16611631
* This is an auxiliary function to get the attribute number
16621632
* for additional information. It is used in the rum_leaf_data_page_items()
16631633
* function to call the rumDataPageLeafRead() function.
1634+
*
1635+
* The logic of the function assumes that there cannot
1636+
* be several types of additional information in the index,
1637+
* otherwise it will not work.
16641638
*/
16651639
static OffsetNumber
16661640
find_add_info_atrr_num(RumState *rum_state_ptr)
16671641
{
1668-
OffsetNumber add_info_attr_num = 0;
1642+
OffsetNumber add_info_attr_num = InvalidOffsetNumber;
16691643

16701644
/* Number of index attributes */
16711645
int num_attrs = rum_state_ptr->origTupdesc->natts;
16721646

16731647
/* Go through the addAttrs array */
1674-
for (int i = 0; i < num_attrs; i++)
1648+
for (int i = 0; i < num_attrs; i++)
1649+
{
16751650
if ((rum_state_ptr->addAttrs)[i] != NULL)
1651+
{
1652+
Assert(add_info_attr_num == InvalidOffsetNumber);
16761653
add_info_attr_num = i;
1654+
}
1655+
}
16771656

16781657
/* Need to add 1 because the attributes are numbered from 1 */
16791658
return add_info_attr_num + 1;
@@ -1683,8 +1662,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16831662
#define POS_MAX_VAL_LENGHT 6
16841663

16851664
/*
1686-
* A function for extracting the positions of tokens from additional
1687-
* information. Returns a string in which the positions of the tokens
1665+
* A function for extracting the positions of lexemes from additional
1666+
* information. Returns a string in which the positions of the lexemes
16881667
* are recorded. The memory that the string occupies must be cleared later.
16891668
*/
16901669
static Datum
@@ -1711,14 +1690,17 @@ get_positions_to_text_datum(Datum add_info)
17111690
cur_max_str_lenght = POS_STR_BUF_LENGHT;
17121691
positions_str_cur_ptr = positions_str;
17131692

1714-
/* Extract the positions of the tokens and put them in the string */
1693+
/* Extract the positions of the lexemes and put them in the string */
17151694
for (int i = 0; i < npos; i++)
17161695
{
17171696
/* At each iteration decode the position */
17181697
ptrt = decompress_pos(ptrt, &position);
17191698

1720-
/* Write this position in the string */
1721-
sprintf(positions_str_cur_ptr, "%d,", position);
1699+
/* Write this position and weight in the string */
1700+
if(pos_get_weight(position) == 'D')
1701+
sprintf(positions_str_cur_ptr, "%d,", WEP_GETPOS(position));
1702+
else
1703+
sprintf(positions_str_cur_ptr, "%d%c,", WEP_GETPOS(position), pos_get_weight(position));
17221704

17231705
/* Moving the pointer forward */
17241706
positions_str_cur_ptr += strlen(positions_str_cur_ptr);
@@ -1744,3 +1726,25 @@ get_positions_to_text_datum(Datum add_info)
17441726
pfree(positions_str);
17451727
return res;
17461728
}
1729+
1730+
/*
1731+
* The function extracts the weight and
1732+
* returns the corresponding letter.
1733+
*/
1734+
static char
1735+
pos_get_weight(WordEntryPos position)
1736+
{
1737+
char res = 'D';
1738+
1739+
switch(WEP_GETWEIGHT(position))
1740+
{
1741+
case 3:
1742+
return 'A';
1743+
case 2:
1744+
return 'B';
1745+
case 1:
1746+
return 'C';
1747+
}
1748+
1749+
return res;
1750+
}

0 commit comments

Comments
 (0)