12
12
#include "postgres.h"
13
13
14
14
#include "catalog/pg_type.h"
15
+ #include "tsearch/ts_type.h"
15
16
#include "tsearch/ts_utils.h"
16
17
#include "utils/array.h"
17
18
#include "utils/builtins.h"
@@ -27,23 +28,11 @@ PG_FUNCTION_INFO_V1(rum_tsquery_pre_consistent);
27
28
PG_FUNCTION_INFO_V1 (rum_tsquery_distance );
28
29
PG_FUNCTION_INFO_V1 (rum_ts_distance );
29
30
30
- #define RANK_NO_NORM 0x00
31
- #define RANK_NORM_LOGLENGTH 0x01
32
- #define RANK_NORM_LENGTH 0x02
33
- #define RANK_NORM_EXTDIST 0x04
34
- #define RANK_NORM_UNIQ 0x08
35
- #define RANK_NORM_LOGUNIQ 0x10
36
- #define RANK_NORM_RDIVRPLUS1 0x20
37
- #define DEF_NORM_METHOD RANK_NO_NORM
38
-
39
31
static float calc_rank_pos_and (float * w ,Datum * addInfo ,bool * addInfoIsNull ,
40
32
int size );
41
33
static float calc_rank_pos_or (float * w ,Datum * addInfo ,bool * addInfoIsNull ,
42
34
int size );
43
35
44
- static float calc_rank_or (const float * w ,TSVector t ,TSQuery q );
45
- static float calc_rank_and (const float * w ,TSVector t ,TSQuery q );
46
-
47
36
typedef struct
48
37
{
49
38
QueryItem * first_item ;
@@ -127,82 +116,10 @@ word_distance(int32 w)
127
116
return 1.0 / (1.005 + 0.05 * exp (((float4 )w ) /1.5 - 2 ));
128
117
}
129
118
130
- static int
131
- cnt_length (TSVector t )
132
- {
133
- WordEntry * ptr = ARRPTR (t ),
134
- * end = (WordEntry * )STRPTR (t );
135
- int len = 0 ;
136
-
137
- while (ptr < end )
138
- {
139
- int clen = POSDATALEN (t ,ptr );
140
-
141
- if (clen == 0 )
142
- len += 1 ;
143
- else
144
- len += clen ;
145
-
146
- ptr ++ ;
147
- }
148
-
149
- return len ;
150
- }
151
-
152
119
#define WordECompareQueryItem (e ,q ,p ,i ,m ) \
153
120
tsCompareString((q) + (i)->distance, (i)->length,\
154
121
(e) + (p)->pos, (p)->len, (m))
155
122
156
- /*
157
- * Returns a pointer to a WordEntry's array corresponding to 'item' from
158
- * tsvector 't'. 'q' is the TSQuery containing 'item'.
159
- * Returns NULL if not found.
160
- */
161
- static WordEntry *
162
- find_wordentry (TSVector t ,TSQuery q ,QueryOperand * item ,int32 * nitem )
163
- {
164
- WordEntry * StopLow = ARRPTR (t );
165
- WordEntry * StopHigh = (WordEntry * )STRPTR (t );
166
- WordEntry * StopMiddle = StopHigh ;
167
- int difference ;
168
-
169
- * nitem = 0 ;
170
-
171
- /* Loop invariant: StopLow <= item < StopHigh */
172
- while (StopLow < StopHigh )
173
- {
174
- StopMiddle = StopLow + (StopHigh - StopLow ) /2 ;
175
- difference = WordECompareQueryItem (STRPTR (t ),GETOPERAND (q ),StopMiddle ,item , false);
176
- if (difference == 0 )
177
- {
178
- StopHigh = StopMiddle ;
179
- * nitem = 1 ;
180
- break ;
181
- }
182
- else if (difference > 0 )
183
- StopLow = StopMiddle + 1 ;
184
- else
185
- StopHigh = StopMiddle ;
186
- }
187
-
188
- if (item -> prefix )
189
- {
190
- if (StopLow >=StopHigh )
191
- StopMiddle = StopHigh ;
192
-
193
- * nitem = 0 ;
194
-
195
- while (StopMiddle < (WordEntry * )STRPTR (t )&&
196
- WordECompareQueryItem (STRPTR (t ),GETOPERAND (q ),StopMiddle ,item , true)== 0 )
197
- {
198
- (* nitem )++ ;
199
- StopMiddle ++ ;
200
- }
201
- }
202
-
203
- return (* nitem > 0 ) ?StopHigh :NULL ;
204
- }
205
-
206
123
static int
207
124
compress_pos (char * target ,uint16 * pos ,int npos )
208
125
{
@@ -487,206 +404,6 @@ SortAndUniqItems(TSQuery q, int *size)
487
404
return res ;
488
405
}
489
406
490
- static float
491
- calc_rank_and (const float * w ,TSVector t ,TSQuery q )
492
- {
493
- WordEntryPosVector * * pos ;
494
- WordEntryPosVector1 posnull ;
495
- WordEntryPosVector * POSNULL ;
496
- int i ,
497
- k ,
498
- l ,
499
- p ;
500
- WordEntry * entry ,
501
- * firstentry ;
502
- WordEntryPos * post ,
503
- * ct ;
504
- int32 dimt ,
505
- lenct ,
506
- dist ,
507
- nitem ;
508
- float res = -1.0 ;
509
- QueryOperand * * item ;
510
- int size = q -> size ;
511
-
512
- item = SortAndUniqItems (q ,& size );
513
- if (size < 2 )
514
- {
515
- pfree (item );
516
- return calc_rank_or (w ,t ,q );
517
- }
518
- pos = (WordEntryPosVector * * )palloc0 (sizeof (WordEntryPosVector * )* q -> size );
519
-
520
- /* A dummy WordEntryPos array to use when haspos is false */
521
- posnull .npos = 1 ;
522
- posnull .pos [0 ]= 0 ;
523
- WEP_SETPOS (posnull .pos [0 ],MAXENTRYPOS - 1 );
524
- POSNULL = (WordEntryPosVector * )& posnull ;
525
-
526
- for (i = 0 ;i < size ;i ++ )
527
- {
528
- firstentry = entry = find_wordentry (t ,q ,item [i ],& nitem );
529
- if (!entry )
530
- continue ;
531
-
532
- while (entry - firstentry < nitem )
533
- {
534
- if (entry -> haspos )
535
- pos [i ]= _POSVECPTR (t ,entry );
536
- else
537
- pos [i ]= POSNULL ;
538
-
539
- dimt = pos [i ]-> npos ;
540
- post = pos [i ]-> pos ;
541
- for (k = 0 ;k < i ;k ++ )
542
- {
543
- if (!pos [k ])
544
- continue ;
545
- lenct = pos [k ]-> npos ;
546
- ct = pos [k ]-> pos ;
547
- for (l = 0 ;l < dimt ;l ++ )
548
- {
549
- for (p = 0 ;p < lenct ;p ++ )
550
- {
551
- dist = Abs ((int )WEP_GETPOS (post [l ])- (int )WEP_GETPOS (ct [p ]));
552
- if (dist || (dist == 0 && (pos [i ]== POSNULL || pos [k ]== POSNULL )))
553
- {
554
- float curw ;
555
-
556
- if (!dist )
557
- dist = MAXENTRYPOS ;
558
- curw = sqrt (wpos (post [l ])* wpos (ct [p ])* word_distance (dist ));
559
- res = (res < 0 ) ?curw :1.0 - (1.0 - res )* (1.0 - curw );
560
- }
561
- }
562
- }
563
- }
564
-
565
- entry ++ ;
566
- }
567
- }
568
- pfree (pos );
569
- pfree (item );
570
- return res ;
571
- }
572
-
573
- static float
574
- calc_rank_or (const float * w ,TSVector t ,TSQuery q )
575
- {
576
- WordEntry * entry ,
577
- * firstentry ;
578
- WordEntryPosVector1 posnull ;
579
- WordEntryPos * post ;
580
- int32 dimt ,
581
- j ,
582
- i ,
583
- nitem ;
584
- float res = 0.0 ;
585
- QueryOperand * * item ;
586
- int size = q -> size ;
587
-
588
- /* A dummy WordEntryPos array to use when haspos is false */
589
- posnull .npos = 1 ;
590
- posnull .pos [0 ]= 0 ;
591
-
592
- item = SortAndUniqItems (q ,& size );
593
-
594
- for (i = 0 ;i < size ;i ++ )
595
- {
596
- float resj ,
597
- wjm ;
598
- int32 jm ;
599
-
600
- firstentry = entry = find_wordentry (t ,q ,item [i ],& nitem );
601
- if (!entry )
602
- continue ;
603
-
604
- while (entry - firstentry < nitem )
605
- {
606
- if (entry -> haspos )
607
- {
608
- dimt = POSDATALEN (t ,entry );
609
- post = POSDATAPTR (t ,entry );
610
- }
611
- else
612
- {
613
- dimt = posnull .npos ;
614
- post = posnull .pos ;
615
- }
616
-
617
- resj = 0.0 ;
618
- wjm = -1.0 ;
619
- jm = 0 ;
620
- for (j = 0 ;j < dimt ;j ++ )
621
- {
622
- resj = resj + wpos (post [j ]) / ((j + 1 )* (j + 1 ));
623
- if (wpos (post [j ])> wjm )
624
- {
625
- wjm = wpos (post [j ]);
626
- jm = j ;
627
- }
628
- }
629
- /*
630
- limit (sum(i/i^2),i->inf) = pi^2/6
631
- resj = sum(wi/i^2),i=1,noccurence,
632
- wi - should be sorted desc,
633
- don't sort for now, just choose maximum weight. This should be corrected
634
- Oleg Bartunov
635
- */
636
- res = res + (wjm + resj - wjm / ((jm + 1 )* (jm + 1 ))) /1.64493406685 ;
637
-
638
- entry ++ ;
639
- }
640
- }
641
- if (size > 0 )
642
- res = res /size ;
643
- pfree (item );
644
- return res ;
645
- }
646
-
647
- static float
648
- calc_rank (const float * w ,TSVector t ,TSQuery q ,int32 method )
649
- {
650
- QueryItem * item = GETQUERY (q );
651
- float res = 0.0 ;
652
- int len ;
653
-
654
- if (!t -> size || !q -> size )
655
- return 0.0 ;
656
-
657
- /* XXX: What about NOT? */
658
- res = (item -> type == QI_OPR && (item -> qoperator .oper == OP_AND ||
659
- item -> qoperator .oper == OP_PHRASE )) ?
660
- calc_rank_and (w ,t ,q ) :
661
- calc_rank_or (w ,t ,q );
662
-
663
- if (res < 0 )
664
- res = 1e-20f ;
665
-
666
- if ((method & RANK_NORM_LOGLENGTH )&& t -> size > 0 )
667
- res /=log ((double ) (cnt_length (t )+ 1 )) /log (2.0 );
668
-
669
- if (method & RANK_NORM_LENGTH )
670
- {
671
- len = cnt_length (t );
672
- if (len > 0 )
673
- res /= (float )len ;
674
- }
675
-
676
- /* RANK_NORM_EXTDIST not applicable */
677
-
678
- if ((method & RANK_NORM_UNIQ )&& t -> size > 0 )
679
- res /= (float ) (t -> size );
680
-
681
- if ((method & RANK_NORM_LOGUNIQ )&& t -> size > 0 )
682
- res /=log ((double ) (t -> size + 1 )) /log (2.0 );
683
-
684
- if (method & RANK_NORM_RDIVRPLUS1 )
685
- res /= (res + 1 );
686
-
687
- return res ;
688
- }
689
-
690
407
Datum
691
408
rum_extract_tsvector (PG_FUNCTION_ARGS )
692
409
{
@@ -740,44 +457,6 @@ rum_extract_tsvector(PG_FUNCTION_ARGS)
740
457
PG_RETURN_POINTER (entries );
741
458
}
742
459
743
- static const float *
744
- getWeights (ArrayType * win )
745
- {
746
- static float ws [lengthof (weights )];
747
- int i ;
748
- float4 * arrdata ;
749
-
750
- if (win == NULL )
751
- return weights ;
752
-
753
- if (ARR_NDIM (win )!= 1 )
754
- ereport (ERROR ,
755
- (errcode (ERRCODE_ARRAY_SUBSCRIPT_ERROR ),
756
- errmsg ("array of weight must be one-dimensional" )));
757
-
758
- if (ArrayGetNItems (ARR_NDIM (win ),ARR_DIMS (win ))< lengthof (weights ))
759
- ereport (ERROR ,
760
- (errcode (ERRCODE_ARRAY_SUBSCRIPT_ERROR ),
761
- errmsg ("array of weight is too short" )));
762
-
763
- if (array_contains_nulls (win ))
764
- ereport (ERROR ,
765
- (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
766
- errmsg ("array of weight must not contain nulls" )));
767
-
768
- arrdata = (float4 * )ARR_DATA_PTR (win );
769
- for (i = 0 ;i < lengthof (weights );i ++ )
770
- {
771
- ws [i ]= (arrdata [i ] >=0 ) ?arrdata [i ] :weights [i ];
772
- if (ws [i ]> 1.0 )
773
- ereport (ERROR ,
774
- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
775
- errmsg ("weight out of range" )));
776
- }
777
-
778
- return ws ;
779
- }
780
-
781
460
Datum
782
461
rum_extract_tsquery (PG_FUNCTION_ARGS )
783
462
{
@@ -893,15 +572,10 @@ rum_tsquery_distance(PG_FUNCTION_ARGS)
893
572
Datum
894
573
rum_ts_distance (PG_FUNCTION_ARGS )
895
574
{
896
- TSVector txt = PG_GETARG_TSVECTOR (0 );
897
- TSQuery query = PG_GETARG_TSQUERY (1 );
898
- float res ;
899
-
900
- res = 1.0 /calc_rank (getWeights (NULL ),txt ,query ,DEF_NORM_METHOD );
901
-
902
- PG_FREE_IF_COPY (txt ,0 );
903
- PG_FREE_IF_COPY (query ,1 );
904
- PG_RETURN_FLOAT4 (res );
575
+ return DirectFunctionCall2Coll (ts_rank_tt ,
576
+ PG_GET_COLLATION (),
577
+ PG_GETARG_DATUM (0 ),
578
+ PG_GETARG_DATUM (1 ));
905
579
}
906
580
907
581
Datum