Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita6ea645

Browse files
committed
Stat function now can show statistics per weight of lexemes
1 parent1b9ef00 commita6ea645

File tree

6 files changed

+160
-37
lines changed

6 files changed

+160
-37
lines changed

‎contrib/tsearch2/expected/tsearch2.out

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ select rank(' a:1 s:2 d g'::tsvector, 'a & s');
782782
(1 row)
783783

784784
insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
785+
drop trigger tsvectorupdate on test_tsvector;
785786
select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
786787
word | ndoc | nentry
787788
-----------+------+--------
@@ -1933,6 +1934,55 @@ select * from stat('select a from test_tsvector') order by ndoc desc, nentry des
19331934
qwerti | 1 | 1
19341935
(1146 rows)
19351936

1937+
insert into test_tsvector values ('1', 'a:1a,2,3b b:5a,6a,7c,8');
1938+
insert into test_tsvector values ('1', 'a:1a,2,3c b:5a,6b,7c,8b');
1939+
select * from stat('select a from test_tsvector','a') order by ndoc desc, nentry desc, word;
1940+
word | ndoc | nentry
1941+
------+------+--------
1942+
b | 2 | 3
1943+
a | 2 | 2
1944+
(2 rows)
1945+
1946+
select * from stat('select a from test_tsvector','b') order by ndoc desc, nentry desc, word;
1947+
word | ndoc | nentry
1948+
------+------+--------
1949+
b | 1 | 2
1950+
a | 1 | 1
1951+
(2 rows)
1952+
1953+
select * from stat('select a from test_tsvector','c') order by ndoc desc, nentry desc, word;
1954+
word | ndoc | nentry
1955+
------+------+--------
1956+
b | 2 | 2
1957+
a | 1 | 1
1958+
(2 rows)
1959+
1960+
select * from stat('select a from test_tsvector','d') order by ndoc desc, nentry desc, word;
1961+
word | ndoc | nentry
1962+
-----------+------+--------
1963+
a | 2 | 2
1964+
copyright | 2 | 2
1965+
foo | 1 | 3
1966+
bar | 1 | 2
1967+
345 | 1 | 1
1968+
b | 1 | 1
1969+
qq | 1 | 1
1970+
qwerti | 1 | 1
1971+
(8 rows)
1972+
1973+
select * from stat('select a from test_tsvector','ad') order by ndoc desc, nentry desc, word;
1974+
word | ndoc | nentry
1975+
-----------+------+--------
1976+
a | 2 | 4
1977+
b | 2 | 4
1978+
copyright | 2 | 2
1979+
foo | 1 | 3
1980+
bar | 1 | 2
1981+
345 | 1 | 1
1982+
qq | 1 | 1
1983+
qwerti | 1 | 1
1984+
(8 rows)
1985+
19361986
select reset_tsearch();
19371987
NOTICE: TSearch cache cleaned
19381988
reset_tsearch
@@ -2092,7 +2142,6 @@ select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
20922142
(5 rows)
20932143

20942144
--check ordering
2095-
drop trigger tsvectorupdate on test_tsvector;
20962145
insert into test_tsvector values (null, null);
20972146
select a is null, a from test_tsvector order by a;
20982147
?column? | a
@@ -2108,6 +2157,8 @@ select a is null, a from test_tsvector order by a;
21082157
f |
21092158
f | '345':1 'qwerti':2 'copyright':3
21102159
f | 'qq':7 'bar':2,8 'foo':1,3,6 'copyright':9
2160+
f | 'a':1A,2,3C 'b':5A,6B,7C,8B
2161+
f | 'a':1A,2,3B 'b':5A,6A,7C,8
21112162
f | '7w' 'ch' 'd7' 'eo' 'gw' 'i4' 'lq' 'o6' 'qt' 'y0'
21122163
f | 'ar' 'ei' 'kq' 'ma' 'qa' 'qh' 'qq' 'qz' 'rx' 'st'
21132164
f | 'gs' 'i6' 'i9' 'j2' 'l0' 'oq' 'qx' 'sc' 'xe' 'yu'
@@ -2609,5 +2660,5 @@ select a is null, a from test_tsvector order by a;
26092660
f | '1b' '42' 'a7' 'ab' 'ak' 'ap' 'at' 'av' 'ay' 'b0' 'b9' 'bb' 'bp' 'bu' 'bz' 'cq' 'da' 'de' 'dn' 'e0' 'eb' 'ef' 'eg' 'ek' 'eq' 'er' 'eu' 'ey' 'fn' 'ft' 'gg' 'h4' 'hk' 'hl' 'i7' 'ig' 'ik' 'ip' 'ir' 'iu' 'iw' 'jr' 'jw' 'jx' 'kg' 'lc' 'lg' 'm0' 'na' 'np' 'om' 'on' 'oz' 'pg' 'pn' 'ps' 'pt' 'pz' 'q3' 'q6' 'qa' 'qb' 'ql' 'qq' 'qt' 'qv' 'qw' 'qy' 'r8' 'rf' 'ri' 'rk' 'rl' 'rw' 'sg' 'si' 'sp' 'sw' 'ta' 'th' 'ua' 'uj' 'uu' 'uv' 'uz' 'vj' 'vk' 'vm' 'wc' 'wf' 'wh' 'wn' 'wo' 'ww' 'xb' 'xk' 'xt' 'xw' 'y7' 'ye' 'yl' 'yt' 'yw' 'z4' 'z7' 'zc' 'zw'
26102661
f | '1h' '3s' 'ab' 'ae' 'ax' 'b1' 'bz' 'cy' 'dk' 'dq' 'ds' 'du' 'e8' 'ef' 'ej' 'ek' 'ex' 'f1' 'fe' 'ff' 'fn' 'fo' 'ft' 'fx' 'ge' 'go' 'gz' 'h6' 'hz' 'i2' 'iv' 'iy' 'j5' 'j6' 'ke' 'kf' 'lh' 'lr' 'mc' 'mj' 'na' 'ng' 'oh' 'om' 'oy' 'p2' 'pi' 'pk' 'py' 'q3' 'qb' 'qc' 'qg' 'qn' 'qo' 'qq' 'qu' 'qw' 'qx' 'qy' 'qz' 'r1' 'rk' 'rl' 'rq' 'rs' 'rt' 'ry' 'rz' 'sk' 'sl' 'so' 't9' 'td' 'te' 'tn' 'tw' 'tz' 'ud' 'uk' 'uo' 'uq' 'uw' 'ux' 'uy' 'v1' 'vg' 'vq' 'w4' 'w9' 'wa' 'wg' 'wj' 'wm' 'wo' 'wr' 'ww' 'wy' 'xf' 'xg' 'y9' 'yh' 'yi' 'yk' 'ym' 'yq' 'yv' 'zm'
26112662
t |
2612-
(512 rows)
2663+
(514 rows)
26132664

‎contrib/tsearch2/sql/tsearch2.sql

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,15 @@ select rank(' a:1 s:2B d g'::tsvector, 'a & s');
150150
select rank(' a:1 s:2 d g'::tsvector,'a & s');
151151

152152
insert into test_tsvector (t)values ('foo bar foo the over foo qq bar');
153+
droptrigger tsvectorupdateon test_tsvector;
153154
select*from stat('select a from test_tsvector')order by ndocdesc, nentrydesc, word;
155+
insert into test_tsvectorvalues ('1','a:1a,2,3b b:5a,6a,7c,8');
156+
insert into test_tsvectorvalues ('1','a:1a,2,3c b:5a,6b,7c,8b');
157+
select*from stat('select a from test_tsvector','a')order by ndocdesc, nentrydesc, word;
158+
select*from stat('select a from test_tsvector','b')order by ndocdesc, nentrydesc, word;
159+
select*from stat('select a from test_tsvector','c')order by ndocdesc, nentrydesc, word;
160+
select*from stat('select a from test_tsvector','d')order by ndocdesc, nentrydesc, word;
161+
select*from stat('select a from test_tsvector','ad')order by ndocdesc, nentrydesc, word;
154162

155163
select reset_tsearch();
156164
select to_tsquery('default','skies & books');
@@ -249,7 +257,6 @@ Upon a woman s face. E. J. Pratt (1882 1964)
249257
select*from ts_debug('Tsearch module for PostgreSQL 7.3.3');
250258

251259
--check ordering
252-
droptrigger tsvectorupdateon test_tsvector;
253260
insert into test_tsvectorvalues (null,null);
254261
select a isnull, afrom test_tsvectororder by a;
255262

‎contrib/tsearch2/ts_stat.c

Lines changed: 89 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@ Datum
1515
tsstat_in(PG_FUNCTION_ARGS)
1616
{
1717
tsstat*stat=palloc(STATHDRSIZE);
18-
18+
1919
stat->len=STATHDRSIZE;
2020
stat->size=0;
21+
stat->weight=0;
2122
PG_RETURN_POINTER(stat);
2223
}
2324

@@ -32,6 +33,20 @@ tsstat_out(PG_FUNCTION_ARGS)
3233
PG_RETURN_NULL();
3334
}
3435

36+
staticint
37+
check_weight(tsvector*txt,WordEntry*wptr,int8weight) {
38+
intlen=POSDATALEN(txt,wptr);
39+
intnum=0;
40+
WordEntryPos*ptr=POSDATAPTR(txt,wptr);
41+
42+
while (len--) {
43+
if (weight& (1 <<ptr->weight))
44+
num++;
45+
ptr++;
46+
}
47+
returnnum;
48+
}
49+
3550
staticWordEntry**
3651
SEI_realloc(WordEntry**in,uint32*len)
3752
{
@@ -83,6 +98,7 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
8398
totallen=CALCSTATSIZE(nentry,slen);
8499
newstat=palloc(totallen);
85100
newstat->len=totallen;
101+
newstat->weight=stat->weight;
86102
newstat->size=nentry;
87103

88104
memcpy(STATSTRPTR(newstat),STATSTRPTR(stat),STATSTRSIZE(stat));
@@ -107,8 +123,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
107123
}
108124
nptr=STATPTR(newstat)+ (StopLow-STATPTR(stat));
109125
memcpy(STATPTR(newstat),STATPTR(stat),sizeof(StatEntry)* (StopLow-STATPTR(stat)));
110-
nptr->nentry=POSDATALEN(txt,*ptr);
111-
if (nptr->nentry==0)
126+
if ( (*ptr)->haspos ) {
127+
nptr->nentry= (stat->weight ) ?check_weight(txt,*ptr,stat->weight) :POSDATALEN(txt,*ptr);
128+
}else
112129
nptr->nentry=1;
113130
nptr->ndoc=1;
114131
nptr->len= (*ptr)->len;
@@ -127,8 +144,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
127144
}
128145
else
129146
{
130-
nptr->nentry=POSDATALEN(txt,*ptr);
131-
if (nptr->nentry==0)
147+
if ( (*ptr)->haspos ) {
148+
nptr->nentry= (stat->weight ) ?check_weight(txt,*ptr,stat->weight) :POSDATALEN(txt,*ptr);
149+
}else
132150
nptr->nentry=1;
133151
nptr->ndoc=1;
134152
nptr->len= (*ptr)->len;
@@ -144,8 +162,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
144162

145163
while (ptr-entry<len)
146164
{
147-
nptr->nentry=POSDATALEN(txt,*ptr);
148-
if (nptr->nentry==0)
165+
if ( (*ptr)->haspos ) {
166+
nptr->nentry= (stat->weight ) ?check_weight(txt,*ptr,stat->weight) :POSDATALEN(txt,*ptr);
167+
}else
149168
nptr->nentry=1;
150169
nptr->ndoc=1;
151170
nptr->len= (*ptr)->len;
@@ -173,12 +192,14 @@ ts_accum(PG_FUNCTION_ARGS)
173192
cur=0;
174193
StatEntry*sptr;
175194
WordEntry*wptr;
195+
intn=0;
176196

177197
if (stat==NULL||PG_ARGISNULL(0))
178198
{/* Init in first */
179199
stat=palloc(STATHDRSIZE);
180200
stat->len=STATHDRSIZE;
181201
stat->size=0;
202+
stat->weight=0;
182203
}
183204

184205
/* simple check of correctness */
@@ -201,32 +222,37 @@ ts_accum(PG_FUNCTION_ARGS)
201222
sptr++;
202223
elseif (cmp==0)
203224
{
204-
intn=POSDATALEN(txt,wptr);
205-
206-
if (n==0)
207-
n=1;
208-
sptr->ndoc++;
209-
sptr->nentry+=n;
225+
if (stat->weight==0 ) {
226+
sptr->ndoc++;
227+
sptr->nentry+= (wptr->haspos) ?POSDATALEN(txt,wptr) :1;
228+
}elseif (wptr->haspos&& (n=check_weight(txt,wptr,stat->weight))!=0 ) {
229+
sptr->ndoc++;
230+
sptr->nentry+=n;
231+
}
210232
sptr++;
211233
wptr++;
212234
}
213235
else
214236
{
215-
if (cur==len)
216-
newentry=SEI_realloc(newentry,&len);
217-
newentry[cur]=wptr;
237+
if (stat->weight==0||check_weight(txt,wptr,stat->weight)!=0 ) {
238+
if (cur==len)
239+
newentry=SEI_realloc(newentry,&len);
240+
newentry[cur]=wptr;
241+
cur++;
242+
}
218243
wptr++;
219-
cur++;
220244
}
221245
}
222246

223247
while (wptr-ARRPTR(txt)<txt->size)
224248
{
225-
if (cur==len)
226-
newentry=SEI_realloc(newentry,&len);
227-
newentry[cur]=wptr;
249+
if (stat->weight==0||check_weight(txt,wptr,stat->weight)!=0 ) {
250+
if (cur==len)
251+
newentry=SEI_realloc(newentry,&len);
252+
newentry[cur]=wptr;
253+
cur++;
254+
}
228255
wptr++;
229-
cur++;
230256
}
231257
}
232258
else
@@ -243,12 +269,13 @@ ts_accum(PG_FUNCTION_ARGS)
243269
cmp=compareStatWord(sptr,wptr,stat,txt);
244270
if (cmp==0)
245271
{
246-
intn=POSDATALEN(txt,wptr);
247-
248-
if (n==0)
249-
n=1;
250-
sptr->ndoc++;
251-
sptr->nentry+=n;
272+
if (stat->weight==0 ) {
273+
sptr->ndoc++;
274+
sptr->nentry+= (wptr->haspos) ?POSDATALEN(txt,wptr) :1;
275+
}elseif (wptr->haspos&& (n=check_weight(txt,wptr,stat->weight))!=0 ) {
276+
sptr->ndoc++;
277+
sptr->nentry+=n;
278+
}
252279
break;
253280
}
254281
elseif (cmp<0)
@@ -259,10 +286,12 @@ ts_accum(PG_FUNCTION_ARGS)
259286

260287
if (StopLow >=StopHigh)
261288
{/* not found */
262-
if (cur==len)
263-
newentry=SEI_realloc(newentry,&len);
264-
newentry[cur]=wptr;
265-
cur++;
289+
if (stat->weight==0||check_weight(txt,wptr,stat->weight)!=0 ) {
290+
if (cur==len)
291+
newentry=SEI_realloc(newentry,&len);
292+
newentry[cur]=wptr;
293+
cur++;
294+
}
266295
}
267296
wptr++;
268297
}
@@ -389,7 +418,7 @@ get_ti_Oid(void)
389418
}
390419

391420
statictsstat*
392-
ts_stat_sql(text*txt)
421+
ts_stat_sql(text*txt,text*ws)
393422
{
394423
char*query=text2char(txt);
395424
inti;
@@ -423,6 +452,31 @@ ts_stat_sql(text *txt)
423452
stat=palloc(STATHDRSIZE);
424453
stat->len=STATHDRSIZE;
425454
stat->size=0;
455+
stat->weight=0;
456+
457+
if (ws ) {
458+
char*buf;
459+
buf=VARDATA(ws);
460+
while(buf-VARDATA(ws)<VARSIZE(buf)-VARHDRSZ ) {
461+
switch (tolower(*buf)) {
462+
case'a':
463+
stat->weight |=1 <<3;
464+
break;
465+
case'b':
466+
stat->weight |=1 <<2;
467+
break;
468+
case'c':
469+
stat->weight |=1 <<1;
470+
break;
471+
case'd':
472+
stat->weight |=1;
473+
break;
474+
default:
475+
stat->weight |=0;
476+
}
477+
buf++;
478+
}
479+
}
426480

427481
while (SPI_processed>0)
428482
{
@@ -467,11 +521,13 @@ ts_stat(PG_FUNCTION_ARGS)
467521
{
468522
tsstat*stat;
469523
text*txt=PG_GETARG_TEXT_P(0);
524+
text*ws= (PG_NARGS()>1) ?PG_GETARG_TEXT_P(1) :NULL;
470525

471526
funcctx=SRF_FIRSTCALL_INIT();
472527
SPI_connect();
473-
stat=ts_stat_sql(txt);
528+
stat=ts_stat_sql(txt,ws);
474529
PG_FREE_IF_COPY(txt,0);
530+
if (PG_NARGS()>1 )PG_FREE_IF_COPY(ws,1);
475531
ts_setup_firstcall(funcctx,stat);
476532
SPI_finish();
477533
}

‎contrib/tsearch2/ts_stat.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ typedef struct
2020
{
2121
int4len;
2222
int4size;
23+
int4weight;
2324
chardata[1];
2425
}tsstat;
2526

26-
#defineSTATHDRSIZE (sizeof(int4)*2)
27+
#defineSTATHDRSIZE (sizeof(int4)*4)
2728
#defineCALCSTATSIZE(x,lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
2829
#defineSTATPTR(x)( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
2930
#defineSTATSTRPTR(x)( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )

‎contrib/tsearch2/tsearch.sql.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,12 @@ CREATE FUNCTION stat(text)
652652
language 'C'
653653
with (isstrict);
654654

655+
CREATE FUNCTION stat(text,text)
656+
returns setof statinfo
657+
as 'MODULE_PATHNAME', 'ts_stat'
658+
language 'C'
659+
with (isstrict);
660+
655661
--reset - just for debuging
656662
CREATE FUNCTION reset_tsearch()
657663
returns void

‎contrib/tsearch2/untsearch.sql.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ DROP FUNCTION gtsvector_penalty(internal,internal,internal);
5959
DROP FUNCTION gtsvector_picksplit(internal, internal);
6060
DROP FUNCTION gtsvector_union(internal, internal);
6161
DROP FUNCTION reset_tsearch();
62+
DROP FUNCTION stat(text);
63+
DROP FUNCTION stat(text,stat);
6264
DROP FUNCTION tsearch2() CASCADE;
6365
DROP FUNCTION _get_parser_from_curcfg();
6466

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp