Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit38f455b

Browse files
author
Artur Zakirov
committed
Added opclass functions
1 parent4df8c7b commit38f455b

File tree

5 files changed

+350
-6
lines changed

5 files changed

+350
-6
lines changed

‎Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# contrib/rum/Makefile
22

33
MODULE_big = rum
4-
OBJS = ginsort.o\
4+
OBJS = ginsort.orum_ts_utils.o\
55
ginarrayproc.o ginbtree.o ginbulk.o gindatapage.o\
66
ginentrypage.o ginfast.o ginget.o gininsert.o\
77
ginscan.o ginutil.o ginvacuum.o$(WIN32RES)

‎ginutil.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ rumhandler(PG_FUNCTION_ARGS)
3838
IndexAmRoutine*amroutine=makeNode(IndexAmRoutine);
3939

4040
amroutine->amstrategies=0;
41-
amroutine->amsupport=6;
41+
amroutine->amsupport=9;
4242
amroutine->amcanorder= false;
4343
amroutine->amcanorderbyop= false;
4444
amroutine->amcanbackward= false;

‎rum--1.0.sql

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,21 @@ LANGUAGE C;
77
CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler;
88

99
-- Opclasses
10+
CREATEFUNCTIONgin_tsvector_config(internal)
11+
RETURNS void
12+
AS'MODULE_PATHNAME'
13+
LANGUAGE C IMMUTABLE STRICT;
14+
15+
CREATEFUNCTIONgin_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal)
16+
RETURNS bool
17+
AS'MODULE_PATHNAME'
18+
LANGUAGE C IMMUTABLE STRICT;
19+
20+
CREATEFUNCTIONgin_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal)
21+
RETURNS float8
22+
AS'MODULE_PATHNAME'
23+
LANGUAGE C IMMUTABLE STRICT;
24+
1025
CREATEOPERATOR CLASSrum_tsvector_ops
1126
FOR TYPE tsvector USING rum
1227
AS
@@ -17,4 +32,8 @@ AS
1732
FUNCTION3 gin_extract_tsquery(tsvector,internal,smallint,internal,internal,internal,internal),
1833
FUNCTION4 gin_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
1934
FUNCTION5 gin_cmp_prefix(text,text,smallint,internal),
35+
FUNCTION6 gin_tsquery_triconsistent(internal,smallint,tsvector,int,internal,internal,internal),
36+
FUNCTION7 gin_tsvector_config(internal),
37+
FUNCTION8 gin_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
38+
FUNCTION9 gin_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal),
2039
STORAGEtext;

‎rum.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -362,10 +362,6 @@ typedef struct GinState
362362
OidsupportCollation[INDEX_MAX_KEYS];
363363
}GinState;
364364

365-
#defineGIN_CONFIG_PROC 7
366-
#defineGIN_PRE_CONSISTENT_PROC 8
367-
#defineGIN_ORDERING_PROC 9
368-
369365
typedefstructGinConfig
370366
{
371367
OidaddInfoTypeOid;
@@ -861,6 +857,15 @@ extern void ginHeapTupleFastCollect(GinState *ginstate,
861857
externvoidginInsertCleanup(GinState*ginstate,
862858
boolvac_delay,IndexBulkDeleteResult*stats);
863859

860+
/* rum_ts_utils.c */
861+
#defineGIN_CONFIG_PROC 7
862+
#defineGIN_PRE_CONSISTENT_PROC 8
863+
#defineGIN_ORDERING_PROC 9
864+
865+
externDatumgin_tsvector_config(PG_FUNCTION_ARGS);
866+
externDatumgin_tsquery_pre_consistent(PG_FUNCTION_ARGS);
867+
externDatumgin_tsquery_distance(PG_FUNCTION_ARGS);
868+
864869
/*
865870
* Functions for reading ItemPointers with additional information. Used in
866871
* various .c files and have to be inline for being fast.

‎rum_ts_utils.c

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* rum_ts_utils.c
4+
*various support functions
5+
*
6+
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7+
*
8+
*-------------------------------------------------------------------------
9+
*/
10+
11+
#include"postgres.h"
12+
13+
#include"catalog/pg_type.h"
14+
#include"tsearch/ts_type.h"
15+
#include"tsearch/ts_utils.h"
16+
17+
#include"rum.h"
18+
19+
#include<math.h>
20+
21+
PG_FUNCTION_INFO_V1(gin_tsvector_config);
22+
PG_FUNCTION_INFO_V1(gin_tsquery_pre_consistent);
23+
PG_FUNCTION_INFO_V1(gin_tsquery_distance);
24+
25+
staticfloatcalc_rank_and(float*w,Datum*addInfo,bool*addInfoIsNull,
26+
intsize);
27+
staticfloatcalc_rank_or(float*w,Datum*addInfo,bool*addInfoIsNull,
28+
intsize);
29+
30+
typedefstruct
31+
{
32+
QueryItem*first_item;
33+
bool*check;
34+
int*map_item_operand;
35+
bool*need_recheck;
36+
}GinChkVal;
37+
38+
staticbool
39+
checkcondition_gin(void*checkval,QueryOperand*val,ExecPhraseData*data)
40+
{
41+
GinChkVal*gcv= (GinChkVal*)checkval;
42+
intj;
43+
44+
/* if any val requiring a weight is used, set recheck flag */
45+
if (val->weight!=0)
46+
*(gcv->need_recheck)= true;
47+
48+
/* convert item's number to corresponding entry's (operand's) number */
49+
j=gcv->map_item_operand[((QueryItem*)val)-gcv->first_item];
50+
51+
/* return presence of current entry in indexed value */
52+
returngcv->check[j];
53+
}
54+
55+
Datum
56+
gin_tsquery_pre_consistent(PG_FUNCTION_ARGS)
57+
{
58+
bool*check= (bool*)PG_GETARG_POINTER(0);
59+
60+
TSQueryquery=PG_GETARG_TSQUERY(2);
61+
62+
Pointer*extra_data= (Pointer*)PG_GETARG_POINTER(4);
63+
boolrecheck;
64+
boolres= FALSE;
65+
66+
if (query->size>0)
67+
{
68+
QueryItem*item;
69+
GinChkValgcv;
70+
71+
/*
72+
* check-parameter array has one entry for each value (operand) in the
73+
* query.
74+
*/
75+
gcv.first_item=item=GETQUERY(query);
76+
gcv.check=check;
77+
gcv.map_item_operand= (int*) (extra_data[0]);
78+
gcv.need_recheck=&recheck;
79+
80+
res=TS_execute(GETQUERY(query),
81+
&gcv,
82+
false,
83+
checkcondition_gin);
84+
}
85+
86+
PG_RETURN_BOOL(res);
87+
}
88+
89+
staticfloatweights[]= {0.1f,0.2f,0.4f,1.0f};
90+
91+
#definewpos(wep)( w[ WEP_GETWEIGHT(wep) ] )
92+
/* A dummy WordEntryPos array to use when haspos is false */
93+
staticWordEntryPosVectorPOSNULL= {
94+
1,/* Number of elements that follow */
95+
{0}
96+
};
97+
98+
#defineLOWERMASK 0x1F
99+
100+
/*
101+
* Returns a weight of a word collocation
102+
*/
103+
staticfloat4
104+
word_distance(int32w)
105+
{
106+
if (w>100)
107+
return1e-30f;
108+
109+
return1.0 / (1.005+0.05*exp(((float4)w) /1.5-2));
110+
}
111+
112+
staticchar*
113+
decompress_pos(char*ptr,uint16*pos)
114+
{
115+
inti;
116+
uint8v;
117+
uint16delta=0;
118+
119+
i=0;
120+
while (true)
121+
{
122+
v=*ptr;
123+
ptr++;
124+
if (v&HIGHBIT)
125+
{
126+
delta |= (v& (~HIGHBIT)) <<i;
127+
}
128+
else
129+
{
130+
delta |= (v&LOWERMASK) <<i;
131+
*pos+=delta;
132+
WEP_SETWEIGHT(*pos,v >>5);
133+
returnptr;
134+
}
135+
i+=7;
136+
}
137+
}
138+
139+
staticint
140+
count_pos(char*ptr,intlen)
141+
{
142+
intcount=0,i;
143+
for (i=0;i<len;i++)
144+
{
145+
if (!(ptr[i]&HIGHBIT))
146+
count++;
147+
}
148+
returncount;
149+
}
150+
151+
staticfloat
152+
calc_rank_and(float*w,Datum*addInfo,bool*addInfoIsNull,intsize)
153+
{
154+
inti,
155+
k,
156+
l,
157+
p;
158+
WordEntryPospost,
159+
ct;
160+
int32dimt,
161+
lenct,
162+
dist;
163+
floatres=-1.0;
164+
char*ptrt,*ptrc;
165+
166+
if (size<2)
167+
{
168+
returncalc_rank_or(w,addInfo,addInfoIsNull,size);
169+
}
170+
WEP_SETPOS(POSNULL.pos[0],MAXENTRYPOS-1);
171+
172+
for (i=0;i<size;i++)
173+
{
174+
if (!addInfoIsNull[i])
175+
{
176+
dimt=count_pos(VARDATA_ANY(addInfo[i]),VARSIZE_ANY_EXHDR(addInfo[i]));
177+
ptrt= (char*)VARDATA_ANY(addInfo[i]);
178+
}
179+
else
180+
{
181+
dimt=POSNULL.npos;
182+
ptrt= (char*)POSNULL.pos;
183+
}
184+
for (k=0;k<i;k++)
185+
{
186+
if (!addInfoIsNull[k])
187+
lenct=count_pos(VARDATA_ANY(addInfo[k]),VARSIZE_ANY_EXHDR(addInfo[k]));
188+
else
189+
lenct=POSNULL.npos;
190+
post=0;
191+
for (l=0;l<dimt;l++)
192+
{
193+
ptrt=decompress_pos(ptrt,&post);
194+
ct=0;
195+
if (!addInfoIsNull[k])
196+
ptrc= (char*)VARDATA_ANY(addInfo[k]);
197+
else
198+
ptrc= (char*)POSNULL.pos;
199+
for (p=0;p<lenct;p++)
200+
{
201+
ptrc=decompress_pos(ptrc,&ct);
202+
dist=Abs((int)WEP_GETPOS(post)- (int)WEP_GETPOS(ct));
203+
if (dist|| (dist==0&& (ptrt== (char*)POSNULL.pos||ptrc== (char*)POSNULL.pos)))
204+
{
205+
floatcurw;
206+
207+
if (!dist)
208+
dist=MAXENTRYPOS;
209+
curw=sqrt(wpos(post)*wpos(ct)*word_distance(dist));
210+
res= (res<0) ?curw :1.0- (1.0-res)* (1.0-curw);
211+
}
212+
}
213+
}
214+
}
215+
216+
}
217+
returnres;
218+
}
219+
220+
staticfloat
221+
calc_rank_or(float*w,Datum*addInfo,bool*addInfoIsNull,intsize)
222+
{
223+
WordEntryPospost;
224+
int32dimt,
225+
j,
226+
i;
227+
floatres=0.0;
228+
char*ptrt;
229+
230+
for (i=0;i<size;i++)
231+
{
232+
floatresj,
233+
wjm;
234+
int32jm;
235+
236+
if (!addInfoIsNull[i])
237+
{
238+
dimt=count_pos(VARDATA_ANY(addInfo[i]),VARSIZE_ANY_EXHDR(addInfo[i]));
239+
ptrt= (char*)VARDATA_ANY(addInfo[i]);
240+
}
241+
else
242+
{
243+
dimt=POSNULL.npos;
244+
ptrt= (char*)POSNULL.pos;
245+
}
246+
247+
resj=0.0;
248+
wjm=-1.0;
249+
jm=0;
250+
post=0;
251+
for (j=0;j<dimt;j++)
252+
{
253+
ptrt=decompress_pos(ptrt,&post);
254+
resj=resj+wpos(post) / ((j+1)* (j+1));
255+
if (wpos(post)>wjm)
256+
{
257+
wjm=wpos(post);
258+
jm=j;
259+
}
260+
}
261+
/*
262+
limit (sum(i/i^2),i->inf) = pi^2/6
263+
resj = sum(wi/i^2),i=1,noccurence,
264+
wi - should be sorted desc,
265+
don't sort for now, just choose maximum weight. This should be corrected
266+
Oleg Bartunov
267+
*/
268+
res=res+ (wjm+resj-wjm / ((jm+1)* (jm+1))) /1.64493406685;
269+
270+
}
271+
if (size>0)
272+
res=res /size;
273+
returnres;
274+
}
275+
276+
staticfloat
277+
calc_rank(float*w,TSQueryq,Datum*addInfo,bool*addInfoIsNull,intsize)
278+
{
279+
QueryItem*item=GETQUERY(q);
280+
floatres=0.0;
281+
282+
if (!size|| !q->size)
283+
return0.0;
284+
285+
/* XXX: What about NOT? */
286+
res= (item->type==QI_OPR&&item->qoperator.oper==OP_AND) ?
287+
calc_rank_and(w,addInfo,addInfoIsNull,size) :calc_rank_or(w,addInfo,addInfoIsNull,size);
288+
289+
if (res<0)
290+
res=1e-20f;
291+
292+
returnres;
293+
}
294+
295+
Datum
296+
gin_tsquery_distance(PG_FUNCTION_ARGS)
297+
{
298+
/* bool *check = (bool *) PG_GETARG_POINTER(0); */
299+
300+
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
301+
TSQueryquery=PG_GETARG_TSQUERY(2);
302+
303+
int32nkeys=PG_GETARG_INT32(3);
304+
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
305+
Datum*addInfo= (Datum*)PG_GETARG_POINTER(8);
306+
bool*addInfoIsNull= (bool*)PG_GETARG_POINTER(9);
307+
float8res;
308+
309+
res=1.0 / (float8)calc_rank(weights,query,addInfo,addInfoIsNull,nkeys);
310+
311+
PG_RETURN_FLOAT8(res);
312+
}
313+
314+
Datum
315+
gin_tsvector_config(PG_FUNCTION_ARGS)
316+
{
317+
GinConfig*config= (GinConfig*)PG_GETARG_POINTER(0);
318+
config->addInfoTypeOid=BYTEAOID;
319+
PG_RETURN_VOID();
320+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp