14
14
* 2) I/O functions were not available for all types in
15
15
* in the get_datum_text_by_oid() function.
16
16
*
17
- * 3) SIGSEGV in case of bytea output as additional information.
17
+ * 3) The output of lexeme positions in the high keys of the posting
18
+ * tree is not supported.
18
19
*/
19
20
20
21
#include "postgres.h"
@@ -115,8 +116,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115
116
static Datum category_get_datum_text (RumNullCategory category );
116
117
static Oid find_add_info_oid (RumState * rum_state_ptr );
117
118
static OffsetNumber find_add_info_atrr_num (RumState * rum_state_ptr );
118
-
119
119
static Datum get_positions_to_text_datum (Datum add_info );
120
+ static char pos_get_weight (WordEntryPos position );
120
121
121
122
/*
122
123
* The rum_metapage_info() function is used to retrieve
@@ -472,7 +473,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472
473
*/
473
474
if (fctx -> call_cntr <= inter_call_data -> maxoff )
474
475
{
475
- RumItem * high_key_ptr ;
476
+ RumItem * high_key_ptr ; /* to read high key from a page */
476
477
RumItem * rum_item_ptr ; /* to read data from a page */
477
478
Datum values [4 ]; /* return values */
478
479
bool nulls [4 ]; /* true if the corresponding value is NULL */
@@ -497,7 +498,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497
498
values [2 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
498
499
499
500
/* Returning add info */
500
- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
501
+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
501
502
&& inter_call_data -> add_info_oid != BYTEAOID )
502
503
{
503
504
values [3 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -506,12 +507,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506
507
507
508
/*
508
509
* In this case, we are dealing with the positions
509
- * of tokens and they need to be decoded.
510
+ * of lexemes and they need to be decoded.
510
511
*/
511
- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
512
+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
512
513
&& inter_call_data -> add_info_oid == BYTEAOID )
513
514
{
514
- /* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515
515
values [3 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
516
516
}
517
517
@@ -525,26 +525,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525
525
SRF_RETURN_NEXT (fctx , result );
526
526
}
527
527
528
- /*
529
- * Reading information from the page in rum_item.
530
- *
531
- * TODO: The fact is that being on the posting tree page, we don't know which
532
- * index attribute this posting tree was built for, so we don't know the
533
- * attribute number of the additional information. But the rumDataPageLeafRead()
534
- * function requires it to read information from the page. Here we use the auxiliary
535
- * function find_add_info_atr_num(), which simply iterates through the array with
536
- * attributes that are additional information and selects the attribute number for
537
- * which the additional information attribute is not NULL. This approach is incorrect
538
- * because there may not be additional information for the attribute on the page,
539
- * but we hope that in this case add_info_is_null will have the value true and the
540
- * additional information will not be read.
541
- *
542
- * This problem can be solved by asking the user for the attribute number of
543
- * additional information, because going through the index from top to bottom,
544
- * he saw it next to the link to the posting tree root.
545
- */
528
+ /* Reading information from the page in rum_item */
546
529
inter_call_data -> item_ptr = rumDataPageLeafRead (inter_call_data -> item_ptr ,
547
- /* inter_call_data->cur_tuple_key_attnum, */
548
530
find_add_info_atrr_num (inter_call_data -> rum_state_ptr ),
549
531
rum_item_ptr , false, inter_call_data -> rum_state_ptr );
550
532
@@ -554,7 +536,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554
536
values [2 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
555
537
556
538
/* Returning add info */
557
- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
539
+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
558
540
&& inter_call_data -> add_info_oid != BYTEAOID )
559
541
{
560
542
values [3 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo ,
@@ -563,9 +545,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563
545
564
546
/*
565
547
* In this case, we are dealing with the positions
566
- * of tokens and they need to be decoded.
548
+ * of lexemes and they need to be decoded.
567
549
*/
568
- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
550
+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
569
551
&& inter_call_data -> add_info_oid == BYTEAOID )
570
552
{
571
553
values [3 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -729,7 +711,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729
711
*/
730
712
if (fctx -> call_cntr <= inter_call_data -> maxoff )
731
713
{
732
- RumItem * high_key_ptr ;
714
+ RumItem * high_key_ptr ; /* to read high key from a page */
733
715
PostingItem * posting_item_ptr ; /* to read data from a page */
734
716
Datum values [5 ]; /* returned values */
735
717
bool nulls [5 ]; /* true if the corresponding returned value is NULL */
@@ -754,7 +736,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754
736
values [3 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
755
737
756
738
/* Returning add info */
757
- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
739
+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
758
740
&& inter_call_data -> add_info_oid != BYTEAOID )
759
741
{
760
742
values [4 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -763,12 +745,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763
745
764
746
/*
765
747
* In this case, we are dealing with the positions
766
- * of tokens and they need to be decoded.
748
+ * of lexemes and they need to be decoded.
767
749
*/
768
- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
750
+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
769
751
&& inter_call_data -> add_info_oid == BYTEAOID )
770
752
{
771
- /* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772
753
values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
773
754
}
774
755
@@ -793,7 +774,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793
774
values [3 ] = BoolGetDatum (posting_item_ptr -> item .addInfoIsNull );
794
775
795
776
/* Returning add info */
796
- if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
777
+ if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
797
778
&& inter_call_data -> add_info_oid != BYTEAOID )
798
779
{
799
780
values [4 ] = get_datum_text_by_oid (posting_item_ptr -> item .addInfo ,
@@ -802,12 +783,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802
783
803
784
/*
804
785
* In this case, we are dealing with the positions
805
- * of tokens and they need to be decoded.
786
+ * of lexemes and they need to be decoded.
806
787
*/
807
- else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
788
+ else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
808
789
&& inter_call_data -> add_info_oid == BYTEAOID )
809
790
{
810
- /* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811
791
values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
812
792
}
813
793
@@ -1072,17 +1052,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
1072
1052
values [4 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
1073
1053
1074
1054
/* Returning add info */
1075
- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0 &&
1055
+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid &&
1076
1056
inter_call_data -> add_info_oid != BYTEAOID )
1077
1057
{
1078
1058
values [5 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo , inter_call_data -> add_info_oid );
1079
1059
}
1080
1060
1081
1061
/*
1082
1062
* In this case, we are dealing with the positions
1083
- * of tokens and they need to be decoded.
1063
+ * of lexemes and they need to be decoded.
1084
1064
*/
1085
- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
1065
+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
1086
1066
&& inter_call_data -> add_info_oid == BYTEAOID )
1087
1067
{
1088
1068
values [5 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -1427,22 +1407,16 @@ get_page_from_raw(bytea *raw_page)
1427
1407
* int2, int4, int8, float4, float8, money, oid, timestamp,
1428
1408
* timestamptz, time, timetz, date, interval, macaddr, inet,
1429
1409
* cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430
- *
1431
- * TODO: All types accepted by rum must be checked, but
1432
- * perhaps some types are missing or some are superfluous.
1433
1410
*/
1434
1411
static Datum
1435
1412
get_datum_text_by_oid (Datum info , Oid info_oid )
1436
1413
{
1437
1414
char * str_info = NULL ;
1438
1415
1439
- /* info cannot be NULL */
1440
- Assert (DatumGetPointer (info ) != NULL );
1441
-
1442
1416
/*
1443
1417
* Form a string depending on the type of info.
1444
1418
*
1445
- * FIXME : The macros used below are taken from the
1419
+ * TODO : The macros used below are taken from the
1446
1420
* pg_type_d file.h, and it says not to use them
1447
1421
* in the new code.
1448
1422
*/
@@ -1528,18 +1502,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
1528
1502
str_info = OidOutputFunctionCall (F_CHAROUT , info );
1529
1503
break ;
1530
1504
1531
- /*
1532
- * TODO: For some reason, the rum index created for a single tsv
1533
- * field contains additional information as bytea. In addition,
1534
- * if additional information in this format is extracted from
1535
- * posting tree pages, it cannot be displayed correctly as text.
1536
- * If the additional information was extracted from the entry
1537
- * tree pages, then it is displayed correctly.
1538
- */
1539
1505
case BYTEAOID :
1540
- /* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541
- /* break; */
1542
- return CStringGetTextDatum ("BYTEAOID is not supported" );
1506
+ str_info = OidOutputFunctionCall (F_BYTEAOUT , info );
1507
+ break ;
1543
1508
1544
1509
case BITOID :
1545
1510
str_info = OidOutputFunctionCall (F_BIT_OUT , info );
@@ -1634,14 +1599,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
1634
1599
* the Oid of additional information for an attribute for
1635
1600
* which it is not NULL.
1636
1601
*
1637
- * TODO: The logic of the function assumes that there cannot
1602
+ * The logic of the function assumes that there cannot
1638
1603
* be several types of additional information in the index,
1639
1604
* otherwise it will not work.
1640
1605
*/
1641
1606
static Oid
1642
1607
find_add_info_oid (RumState * rum_state_ptr )
1643
1608
{
1644
- Oid add_info_oid = 0 ;
1609
+ Oid add_info_oid = InvalidOid ;
1645
1610
1646
1611
/* Number of index attributes */
1647
1612
int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
@@ -1651,8 +1616,13 @@ find_add_info_oid(RumState *rum_state_ptr)
1651
1616
* oid of additional information.
1652
1617
*/
1653
1618
for (int i = 0 ; i < num_attrs ; i ++ )
1619
+ {
1654
1620
if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1621
+ {
1622
+ Assert (add_info_oid == InvalidOid );
1655
1623
add_info_oid = ((rum_state_ptr -> addAttrs )[i ])-> atttypid ;
1624
+ }
1625
+ }
1656
1626
1657
1627
return add_info_oid ;
1658
1628
}
@@ -1661,19 +1631,28 @@ find_add_info_oid(RumState *rum_state_ptr)
1661
1631
* This is an auxiliary function to get the attribute number
1662
1632
* for additional information. It is used in the rum_leaf_data_page_items()
1663
1633
* function to call the rumDataPageLeafRead() function.
1634
+ *
1635
+ * The logic of the function assumes that there cannot
1636
+ * be several types of additional information in the index,
1637
+ * otherwise it will not work.
1664
1638
*/
1665
1639
static OffsetNumber
1666
1640
find_add_info_atrr_num (RumState * rum_state_ptr )
1667
1641
{
1668
- OffsetNumber add_info_attr_num = 0 ;
1642
+ OffsetNumber add_info_attr_num = InvalidOffsetNumber ;
1669
1643
1670
1644
/* Number of index attributes */
1671
1645
int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
1672
1646
1673
1647
/* Go through the addAttrs array */
1674
- for (int i = 0 ; i < num_attrs ; i ++ )
1648
+ for (int i = 0 ; i < num_attrs ; i ++ )
1649
+ {
1675
1650
if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1651
+ {
1652
+ Assert (add_info_attr_num == InvalidOffsetNumber );
1676
1653
add_info_attr_num = i ;
1654
+ }
1655
+ }
1677
1656
1678
1657
/* Need to add 1 because the attributes are numbered from 1 */
1679
1658
return add_info_attr_num + 1 ;
@@ -1683,8 +1662,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
1683
1662
#define POS_MAX_VAL_LENGHT 6
1684
1663
1685
1664
/*
1686
- * A function for extracting the positions of tokens from additional
1687
- * information. Returns a string in which the positions of the tokens
1665
+ * A function for extracting the positions of lexemes from additional
1666
+ * information. Returns a string in which the positions of the lexemes
1688
1667
* are recorded. The memory that the string occupies must be cleared later.
1689
1668
*/
1690
1669
static Datum
@@ -1711,14 +1690,17 @@ get_positions_to_text_datum(Datum add_info)
1711
1690
cur_max_str_lenght = POS_STR_BUF_LENGHT ;
1712
1691
positions_str_cur_ptr = positions_str ;
1713
1692
1714
- /* Extract the positions of the tokens and put them in the string */
1693
+ /* Extract the positions of the lexemes and put them in the string */
1715
1694
for (int i = 0 ; i < npos ; i ++ )
1716
1695
{
1717
1696
/* At each iteration decode the position */
1718
1697
ptrt = decompress_pos (ptrt , & position );
1719
1698
1720
- /* Write this position in the string */
1721
- sprintf (positions_str_cur_ptr , "%d," , position );
1699
+ /* Write this position and weight in the string */
1700
+ if (pos_get_weight (position ) == 'D' )
1701
+ sprintf (positions_str_cur_ptr , "%d," , WEP_GETPOS (position ));
1702
+ else
1703
+ sprintf (positions_str_cur_ptr , "%d%c," , WEP_GETPOS (position ), pos_get_weight (position ));
1722
1704
1723
1705
/* Moving the pointer forward */
1724
1706
positions_str_cur_ptr += strlen (positions_str_cur_ptr );
@@ -1744,3 +1726,25 @@ get_positions_to_text_datum(Datum add_info)
1744
1726
pfree (positions_str );
1745
1727
return res ;
1746
1728
}
1729
+
1730
+ /*
1731
+ * The function extracts the weight and
1732
+ * returns the corresponding letter.
1733
+ */
1734
+ static char
1735
+ pos_get_weight (WordEntryPos position )
1736
+ {
1737
+ char res = 'D' ;
1738
+
1739
+ switch (WEP_GETWEIGHT (position ))
1740
+ {
1741
+ case 3 :
1742
+ return 'A' ;
1743
+ case 2 :
1744
+ return 'B' ;
1745
+ case 1 :
1746
+ return 'C' ;
1747
+ }
1748
+
1749
+ return res ;
1750
+ }
0 commit comments