Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita6b6010

Browse files
author
Artur Zakirov
committed
Added opclass functions rum_extract_tsvector, rum_extract_tsquery
1 parent38f455b commita6b6010

File tree

3 files changed

+278
-17
lines changed

3 files changed

+278
-17
lines changed

‎rum--1.0.sql

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,27 @@ LANGUAGE C;
77
CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler;
88

99
-- Opclasses
10-
CREATEFUNCTIONgin_tsvector_config(internal)
10+
CREATEFUNCTIONrum_extract_tsvector(tsvector,internal,internal,internal,internal)
11+
RETURNS internal
12+
AS'MODULE_PATHNAME'
13+
LANGUAGE C IMMUTABLE STRICT;
14+
15+
CREATEFUNCTIONrum_extract_tsquery(tsvector,internal,smallint,internal,internal,internal,internal)
16+
RETURNS internal
17+
AS'MODULE_PATHNAME'
18+
LANGUAGE C IMMUTABLE STRICT;
19+
20+
CREATEFUNCTIONrum_tsvector_config(internal)
1121
RETURNS void
1222
AS'MODULE_PATHNAME'
1323
LANGUAGE C IMMUTABLE STRICT;
1424

15-
CREATEFUNCTIONgin_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal)
25+
CREATEFUNCTIONrum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal)
1626
RETURNS bool
1727
AS'MODULE_PATHNAME'
1828
LANGUAGE C IMMUTABLE STRICT;
1929

20-
CREATEFUNCTIONgin_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal)
30+
CREATEFUNCTIONrum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal)
2131
RETURNS float8
2232
AS'MODULE_PATHNAME'
2333
LANGUAGE C IMMUTABLE STRICT;
@@ -28,12 +38,12 @@ AS
2838
OPERATOR1 @@ (tsvector, tsquery),
2939
OPERATOR2 @@@ (tsvector, tsquery),
3040
FUNCTION1 bttextcmp(text,text),
31-
FUNCTION2gin_extract_tsvector(tsvector,internal,internal),
32-
FUNCTION3gin_extract_tsquery(tsvector,internal,smallint,internal,internal,internal,internal),
41+
FUNCTION2rum_extract_tsvector(tsvector,internal,internal,internal,internal),
42+
FUNCTION3rum_extract_tsquery(tsvector,internal,smallint,internal,internal,internal,internal),
3343
FUNCTION4 gin_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
3444
FUNCTION5 gin_cmp_prefix(text,text,smallint,internal),
3545
FUNCTION6 gin_tsquery_triconsistent(internal,smallint,tsvector,int,internal,internal,internal),
36-
FUNCTION7gin_tsvector_config(internal),
37-
FUNCTION8gin_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
38-
FUNCTION9gin_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal),
46+
FUNCTION7rum_tsvector_config(internal),
47+
FUNCTION8rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
48+
FUNCTION9rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal),
3949
STORAGEtext;

‎rum.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -862,9 +862,11 @@ extern void ginInsertCleanup(GinState *ginstate,
862862
#defineGIN_PRE_CONSISTENT_PROC 8
863863
#defineGIN_ORDERING_PROC 9
864864

865-
externDatumgin_tsvector_config(PG_FUNCTION_ARGS);
866-
externDatumgin_tsquery_pre_consistent(PG_FUNCTION_ARGS);
867-
externDatumgin_tsquery_distance(PG_FUNCTION_ARGS);
865+
externDatumrum_extract_tsvector(PG_FUNCTION_ARGS);
866+
externDatumrum_extract_tsquery(PG_FUNCTION_ARGS);
867+
externDatumrum_tsvector_config(PG_FUNCTION_ARGS);
868+
externDatumrum_tsquery_pre_consistent(PG_FUNCTION_ARGS);
869+
externDatumrum_tsquery_distance(PG_FUNCTION_ARGS);
868870

869871
/*
870872
* Functions for reading ItemPointers with additional information. Used in

‎rum_ts_utils.c

Lines changed: 255 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,17 @@
1313
#include"catalog/pg_type.h"
1414
#include"tsearch/ts_type.h"
1515
#include"tsearch/ts_utils.h"
16+
#include"utils/builtins.h"
1617

1718
#include"rum.h"
1819

1920
#include<math.h>
2021

21-
PG_FUNCTION_INFO_V1(gin_tsvector_config);
22-
PG_FUNCTION_INFO_V1(gin_tsquery_pre_consistent);
23-
PG_FUNCTION_INFO_V1(gin_tsquery_distance);
22+
PG_FUNCTION_INFO_V1(rum_extract_tsvector);
23+
PG_FUNCTION_INFO_V1(rum_extract_tsquery);
24+
PG_FUNCTION_INFO_V1(rum_tsvector_config);
25+
PG_FUNCTION_INFO_V1(rum_tsquery_pre_consistent);
26+
PG_FUNCTION_INFO_V1(rum_tsquery_distance);
2427

2528
staticfloatcalc_rank_and(float*w,Datum*addInfo,bool*addInfoIsNull,
2629
intsize);
@@ -53,7 +56,7 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
5356
}
5457

5558
Datum
56-
gin_tsquery_pre_consistent(PG_FUNCTION_ARGS)
59+
rum_tsquery_pre_consistent(PG_FUNCTION_ARGS)
5760
{
5861
bool*check= (bool*)PG_GETARG_POINTER(0);
5962

@@ -95,6 +98,7 @@ static WordEntryPosVector POSNULL = {
9598
{0}
9699
};
97100

101+
#defineSIXTHBIT 0x20
98102
#defineLOWERMASK 0x1F
99103

100104
/*
@@ -109,6 +113,38 @@ word_distance(int32 w)
109113
return1.0 / (1.005+0.05*exp(((float4)w) /1.5-2));
110114
}
111115

116+
staticint
117+
compress_pos(char*target,uint16*pos,intnpos)
118+
{
119+
inti;
120+
uint16prev=0,delta;
121+
char*ptr;
122+
123+
ptr=target;
124+
for (i=0;i<npos;i++)
125+
{
126+
delta=WEP_GETPOS(pos[i])-WEP_GETPOS(prev);
127+
128+
while (true)
129+
{
130+
if (delta >=SIXTHBIT)
131+
{
132+
*ptr= (delta& (~HIGHBIT)) |HIGHBIT;
133+
ptr++;
134+
delta >>=7;
135+
}
136+
else
137+
{
138+
*ptr=delta | (WEP_GETWEIGHT(pos[i]) <<5);
139+
ptr++;
140+
break;
141+
}
142+
}
143+
prev=pos[i];
144+
}
145+
returnptr-target;
146+
}
147+
112148
staticchar*
113149
decompress_pos(char*ptr,uint16*pos)
114150
{
@@ -293,7 +329,220 @@ calc_rank(float *w, TSQuery q, Datum *addInfo, bool *addInfoIsNull, int size)
293329
}
294330

295331
Datum
296-
gin_tsquery_distance(PG_FUNCTION_ARGS)
332+
rum_extract_tsvector(PG_FUNCTION_ARGS)
333+
{
334+
TSVectorvector=PG_GETARG_TSVECTOR(0);
335+
int32*nentries= (int32*)PG_GETARG_POINTER(1);
336+
Datum**addInfo= (Datum**)PG_GETARG_POINTER(3);
337+
bool**addInfoIsNull= (bool**)PG_GETARG_POINTER(4);
338+
Datum*entries=NULL;
339+
340+
*nentries=vector->size;
341+
if (vector->size>0)
342+
{
343+
inti;
344+
WordEntry*we=ARRPTR(vector);
345+
WordEntryPosVector*posVec;
346+
347+
entries= (Datum*)palloc(sizeof(Datum)*vector->size);
348+
*addInfo= (Datum*)palloc(sizeof(Datum)*vector->size);
349+
*addInfoIsNull= (bool*)palloc(sizeof(bool)*vector->size);
350+
351+
for (i=0;i<vector->size;i++)
352+
{
353+
text*txt;
354+
bytea*posData;
355+
intposDataSize;
356+
357+
txt=cstring_to_text_with_len(STRPTR(vector)+we->pos,we->len);
358+
entries[i]=PointerGetDatum(txt);
359+
360+
if (we->haspos)
361+
{
362+
posVec=_POSVECPTR(vector,we);
363+
posDataSize=VARHDRSZ+2*posVec->npos*sizeof(WordEntryPos);
364+
posData= (bytea*)palloc(posDataSize);
365+
posDataSize=compress_pos(posData->vl_dat,posVec->pos,posVec->npos)+VARHDRSZ;
366+
SET_VARSIZE(posData,posDataSize);
367+
368+
(*addInfo)[i]=PointerGetDatum(posData);
369+
(*addInfoIsNull)[i]= false;
370+
}
371+
else
372+
{
373+
(*addInfo)[i]= (Datum)0;
374+
(*addInfoIsNull)[i]= true;
375+
}
376+
we++;
377+
}
378+
}
379+
380+
PG_FREE_IF_COPY(vector,0);
381+
PG_RETURN_POINTER(entries);
382+
}
383+
384+
/*
385+
* sort QueryOperands by (length, word)
386+
*/
387+
staticint
388+
compareQueryOperand(constvoid*a,constvoid*b,void*arg)
389+
{
390+
char*operand= (char*)arg;
391+
QueryOperand*qa= (*(QueryOperand*const*)a);
392+
QueryOperand*qb= (*(QueryOperand*const*)b);
393+
394+
returntsCompareString(operand+qa->distance,qa->length,
395+
operand+qb->distance,qb->length,
396+
false);
397+
}
398+
399+
/*
400+
* Returns a sorted, de-duplicated array of QueryOperands in a query.
401+
* The returned QueryOperands are pointers to the original QueryOperands
402+
* in the query.
403+
*
404+
* Length of the returned array is stored in *size
405+
*/
406+
staticQueryOperand**
407+
SortAndUniqItems(TSQueryq,int*size)
408+
{
409+
char*operand=GETOPERAND(q);
410+
QueryItem*item=GETQUERY(q);
411+
QueryOperand**res,
412+
**ptr,
413+
**prevptr;
414+
415+
ptr=res= (QueryOperand**)palloc(sizeof(QueryOperand*)**size);
416+
417+
/* Collect all operands from the tree to res */
418+
while ((*size)--)
419+
{
420+
if (item->type==QI_VAL)
421+
{
422+
*ptr= (QueryOperand*)item;
423+
ptr++;
424+
}
425+
item++;
426+
}
427+
428+
*size=ptr-res;
429+
if (*size<2)
430+
returnres;
431+
432+
qsort_arg(res,*size,sizeof(QueryOperand*),compareQueryOperand, (void*)operand);
433+
434+
ptr=res+1;
435+
prevptr=res;
436+
437+
/* remove duplicates */
438+
while (ptr-res<*size)
439+
{
440+
if (compareQueryOperand((void*)ptr, (void*)prevptr, (void*)operand)!=0)
441+
{
442+
prevptr++;
443+
*prevptr=*ptr;
444+
}
445+
ptr++;
446+
}
447+
448+
*size=prevptr+1-res;
449+
returnres;
450+
}
451+
452+
Datum
453+
rum_extract_tsquery(PG_FUNCTION_ARGS)
454+
{
455+
TSQueryquery=PG_GETARG_TSQUERY(0);
456+
int32*nentries= (int32*)PG_GETARG_POINTER(1);
457+
458+
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
459+
bool**ptr_partialmatch= (bool**)PG_GETARG_POINTER(3);
460+
Pointer**extra_data= (Pointer**)PG_GETARG_POINTER(4);
461+
462+
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
463+
int32*searchMode= (int32*)PG_GETARG_POINTER(6);
464+
Datum*entries=NULL;
465+
466+
*nentries=0;
467+
468+
if (query->size>0)
469+
{
470+
QueryItem*item=GETQUERY(query);
471+
int32i,
472+
j;
473+
bool*partialmatch;
474+
int*map_item_operand;
475+
char*operand=GETOPERAND(query);
476+
QueryOperand**operands;
477+
478+
/*
479+
* If the query doesn't have any required positive matches (for
480+
* instance, it's something like '! foo'), we have to do a full index
481+
* scan.
482+
*/
483+
if (tsquery_requires_match(item))
484+
*searchMode=GIN_SEARCH_MODE_DEFAULT;
485+
else
486+
*searchMode=GIN_SEARCH_MODE_ALL;
487+
488+
*nentries=query->size;
489+
operands=SortAndUniqItems(query,nentries);
490+
491+
entries= (Datum*)palloc(sizeof(Datum)* (*nentries));
492+
partialmatch=*ptr_partialmatch= (bool*)palloc(sizeof(bool)* (*nentries));
493+
494+
/*
495+
* Make map to convert item's number to corresponding operand's (the
496+
* same, entry's) number. Entry's number is used in check array in
497+
* consistent method. We use the same map for each entry.
498+
*/
499+
*extra_data= (Pointer*)palloc(sizeof(Pointer)* (*nentries));
500+
map_item_operand= (int*)palloc0(sizeof(int)*query->size);
501+
502+
for (i=0;i< (*nentries);i++)
503+
{
504+
text*txt;
505+
506+
txt=cstring_to_text_with_len(GETOPERAND(query)+operands[i]->distance,
507+
operands[i]->length);
508+
entries[i]=PointerGetDatum(txt);
509+
partialmatch[i]=operands[i]->prefix;
510+
(*extra_data)[i]= (Pointer)map_item_operand;
511+
}
512+
513+
/* Now rescan the VAL items and fill in the arrays */
514+
for (j=0;j<query->size;j++)
515+
{
516+
if (item[j].type==QI_VAL)
517+
{
518+
QueryOperand*val=&item[j].qoperand;
519+
boolfound= false;
520+
521+
for (i=0;i< (*nentries);i++)
522+
{
523+
if (!tsCompareString(operand+operands[i]->distance,operands[i]->length,
524+
operand+val->distance,val->length,
525+
false))
526+
{
527+
map_item_operand[j]=i;
528+
found= true;
529+
break;
530+
}
531+
}
532+
533+
if (!found)
534+
elog(ERROR,"Operand not found!");
535+
}
536+
}
537+
}
538+
539+
PG_FREE_IF_COPY(query,0);
540+
541+
PG_RETURN_POINTER(entries);
542+
}
543+
544+
Datum
545+
rum_tsquery_distance(PG_FUNCTION_ARGS)
297546
{
298547
/* bool *check = (bool *) PG_GETARG_POINTER(0); */
299548

@@ -312,7 +561,7 @@ gin_tsquery_distance(PG_FUNCTION_ARGS)
312561
}
313562

314563
Datum
315-
gin_tsvector_config(PG_FUNCTION_ARGS)
564+
rum_tsvector_config(PG_FUNCTION_ARGS)
316565
{
317566
GinConfig*config= (GinConfig*)PG_GETARG_POINTER(0);
318567
config->addInfoTypeOid=BYTEAOID;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp