Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc52795d

Browse files
committed
Text parser rewritten:
- supports multibyte encodings - more strict rules for lexemes - flex isn't usedAdd: - tsquery plainto_tsquery(text) Function makes tsquery from plain text. - &&, ||, !! operation for tsquery for combining tsquery from it's parts: 'foo & bar' || 'asd' => 'foo & bar | asd'
1 parentb91e6ed commitc52795d

File tree

15 files changed

+1613
-424
lines changed

15 files changed

+1613
-424
lines changed

‎contrib/tsearch2/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.11 2005/11/08 17:08:46 teodor Exp $
1+
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.12 2005/11/21 12:27:57 teodor Exp $
22

33
MODULE_big = tsearch2
44
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o\
55
dict_snowball.o dict_ispell.o dict_syn.o\
66
wparser.o wparser_def.o\
77
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o\
88
tsvector_op.o rank.o ts_stat.o\
9-
query_util.o query_support.o query_rewrite.o query_gist.o
9+
query_util.o query_support.o query_rewrite.o query_gist.o\
10+
ts_locale.o
1011

1112
SUBDIRS := snowball ispell wordparser
1213
SUBDIROBJS :=$(SUBDIRS:%=%/SUBSYS.o)

‎contrib/tsearch2/expected/tsearch2.out

Lines changed: 73 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ psql:tsearch2.sql:342: NOTICE: argument type tsvector is only a shell
1313
psql:tsearch2.sql:396: NOTICE: type "tsquery" is not yet defined
1414
DETAIL: Creating a shell type definition.
1515
psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell
16-
psql:tsearch2.sql:544: NOTICE: type "gtsvector" is not yet defined
16+
psql:tsearch2.sql:559: NOTICE: type "gtsvector" is not yet defined
1717
DETAIL: Creating a shell type definition.
18-
psql:tsearch2.sql:549: NOTICE: argument type gtsvector is only a shell
19-
psql:tsearch2.sql:998: NOTICE: type "gtsq" is not yet defined
18+
psql:tsearch2.sql:564: NOTICE: argument type gtsvector is only a shell
19+
psql:tsearch2.sql:1054: NOTICE: type "gtsq" is not yet defined
2020
DETAIL: Creating a shell type definition.
21-
psql:tsearch2.sql:1003: NOTICE: argument type gtsq is only a shell
21+
psql:tsearch2.sql:1059: NOTICE: argument type gtsq is only a shell
2222
--tsvector
2323
SELECT '1'::tsvector;
2424
tsvector
@@ -653,7 +653,7 @@ select * from token_type('default');
653653
11 | lpart_hword | Latin part of hyphenated word
654654
12 | blank | Space symbols
655655
13 | tag | HTML Tag
656-
14 |http | HTTP head
656+
14 |protocol| Protocol head
657657
15 | hword | Hyphenated word
658658
16 | lhword | Latin hyphenated word
659659
17 | nlhword | Non-latin hyphenated word
@@ -672,14 +672,13 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
672672
-------+--------------------------------------
673673
22 | 345
674674
12 |
675-
4 | qwe@efd.r
676-
12 |
677-
12 |'
678-
12 |
675+
1 | qwe
676+
12 |@
677+
19 |efd.r
678+
12 |'
679679
14 | http://
680680
6 | www.com
681-
12 | /
682-
12 |
681+
12 | /
683682
14 | http://
684683
5 | aew.werc.ewr/?ad=qwe&dw
685684
6 | aew.werc.ewr
@@ -700,23 +699,21 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
700699
6 | 4aew.werc.ewr
701700
12 |
702701
14 | http://
703-
5 | 5aew.werc.ewr:8100/?
704-
6 | 5aew.werc.ewr
705-
18 | :8100/?
706-
12 |
702+
6 | 5aew.werc.ewr:8100
703+
12 | /?
707704
1 | ad
708705
12 | =
709706
1 | qwe
710707
12 | &
711708
1 | dw
712709
12 |
713710
5 | 6aew.werc.ewr:8100/?ad=qwe&dw
714-
6 | 6aew.werc.ewr
715-
18 |:8100/?ad=qwe&dw
711+
6 | 6aew.werc.ewr:8100
712+
18 | /?ad=qwe&dw
716713
12 |
717714
5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
718-
6 | 7aew.werc.ewr
719-
18 |:8100/?ad=qwe&dw=%20%32
715+
6 | 7aew.werc.ewr:8100
716+
18 | /?ad=qwe&dw=%20%32
720717
12 |
721718
7 | +4.0e-10
722719
12 |
@@ -747,11 +744,15 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
747744
1 | jf
748745
12 |
749746
1 | sdjk
750-
13 | <we hjwer <werrwe>
747+
12 | <
748+
1 | we
751749
12 |
752-
3 | ewr1
753-
12 | >
750+
1 | hjwer
751+
12 |
752+
13 | <werrwe>
754753
12 |
754+
3 | ewr1
755+
12 | >
755756
3 | ewri2
756757
12 |
757758
13 | <a href="qwe<qwe>">
@@ -767,57 +768,53 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
767768
12 |
768769
19 | /wqe-324/ewr
769770
12 |
770-
6 | gist.h
771-
12 |
772-
6 | gist.h.c
771+
19 | gist.h
773772
12 |
774-
6 | gist.c
775-
12 | .
773+
19 | gist.h.c
776774
12 |
775+
19 | gist.c
776+
12 | .
777777
1 | readline
778778
12 |
779779
20 | 4.2
780780
12 |
781781
20 | 4.2
782-
12 | .
783-
12 |
782+
12 | .
784783
20 | 4.2
785-
12 | ,
786-
12 |
787-
15 | readline-4
784+
12 | ,
785+
15 | readline-4.2
788786
11 | readline
789787
12 | -
790788
20 | 4.2
791789
12 |
792-
15 | readline-4
790+
15 | readline-4.2
793791
11 | readline
794792
12 | -
795793
20 | 4.2
796-
12 | .
797-
12 |
794+
12 | .
798795
22 | 234
799796
12 |
800797

801-
13 | <i <b>
798+
12 | <
799+
1 | i
800+
12 |
801+
13 | <b>
802802
12 |
803803
1 | wow
804804
12 |
805-
12 | <
806-
12 |
805+
12 | <
807806
1 | jqw
808807
12 |
809-
12 | <
810-
12 | >
811-
12 |
808+
12 | <>
812809
1 | qwerty
813-
(138 rows)
810+
(135 rows)
814811

815812
SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
816813
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
817814
<i <b> wow < jqw <> qwerty');
818-
to_tsvector
819-
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
820-
'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 'qwe@efd.r':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 'teodor@stack.net':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
815+
to_tsvector
816+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
817+
'ad':17 'dw':19 'jf':39 '234':63 '345':1 '4.2':54,55,56,59,62 '455':31 'jqw':66 'qwe':2,18,27,28,35 'wer':36 'wow':65 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':67 '234.435':30 'qwe-wer':34 'readlin':53,58,61 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 'readline-4.2':57,60 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
821818
(1 row)
822819

823820
SELECT length(to_tsvector('default', '345 qw'));
@@ -831,7 +828,7 @@ SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://ae
831828
<i <b> wow < jqw <> qwerty'));
832829
length
833830
--------
834-
53
831+
51
835832
(1 row)
836833

837834
select to_tsquery('default', 'qwe & sKies ');
@@ -876,6 +873,36 @@ select to_tsquery('default', '(the|and&(i&1))&fghj');
876873
'1' & 'fghj'
877874
(1 row)
878875

876+
select plainto_tsquery('default', 'the and z 1))& fghj');
877+
plainto_tsquery
878+
--------------------
879+
'z' & '1' & 'fghj'
880+
(1 row)
881+
882+
select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd');
883+
?column?
884+
-----------------------
885+
'foo' & 'bar' & 'asd'
886+
(1 row)
887+
888+
select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg');
889+
?column?
890+
------------------------------
891+
'foo' & 'bar' | 'asd' & 'fg'
892+
(1 row)
893+
894+
select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg');
895+
?column?
896+
-----------------------------------
897+
'foo' & 'bar' | !( 'asd' & 'fg' )
898+
(1 row)
899+
900+
select plainto_tsquery('default', 'foo bar') && 'asd | fg';
901+
?column?
902+
----------------------------------
903+
'foo' & 'bar' & ( 'asd' | 'fg' )
904+
(1 row)
905+
879906
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
880907
?column?
881908
----------

‎contrib/tsearch2/query.c

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,20 @@ Datumto_tsquery_name(PG_FUNCTION_ARGS);
5151
PG_FUNCTION_INFO_V1(to_tsquery_current);
5252
Datumto_tsquery_current(PG_FUNCTION_ARGS);
5353

54+
PG_FUNCTION_INFO_V1(plainto_tsquery);
55+
Datumplainto_tsquery(PG_FUNCTION_ARGS);
56+
57+
PG_FUNCTION_INFO_V1(plainto_tsquery_name);
58+
Datumplainto_tsquery_name(PG_FUNCTION_ARGS);
59+
60+
PG_FUNCTION_INFO_V1(plainto_tsquery_current);
61+
Datumplainto_tsquery_current(PG_FUNCTION_ARGS);
62+
5463
/* parser's states */
5564
#defineWAITOPERAND 1
5665
#defineWAITOPERATOR2
5766
#defineWAITFIRSTOPERAND 3
67+
#defineWAITSINGLEOPERAND 4
5868

5969
/*
6070
* node of query tree, also used
@@ -195,6 +205,14 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
195205
elseif (*(state->buf)!=' ')
196206
returnERR;
197207
break;
208+
caseWAITSINGLEOPERAND:
209+
if (*(state->buf)=='\0' )
210+
returnEND;
211+
*strval=state->buf;
212+
*lenval=strlen(state->buf );
213+
state->buf+=strlen(state->buf );
214+
state->count++;
215+
returnVAL;
198216
default:
199217
returnERR;
200218
break;
@@ -582,7 +600,7 @@ findoprnd(ITEM * ptr, int4 *pos)
582600
* input
583601
*/
584602
staticQUERYTYPE*
585-
queryin(char*buf,void (*pushval) (QPRS_STATE*,int,char*,int,int2),intcfg_id)
603+
queryin(char*buf,void (*pushval) (QPRS_STATE*,int,char*,int,int2),intcfg_id,boolisplain)
586604
{
587605
QPRS_STATEstate;
588606
int4i;
@@ -599,7 +617,7 @@ static QUERYTYPE *
599617

600618
/* init state */
601619
state.buf=buf;
602-
state.state=WAITFIRSTOPERAND;
620+
state.state=(isplain) ?WAITSINGLEOPERAND :WAITFIRSTOPERAND;
603621
state.count=0;
604622
state.num=0;
605623
state.str=NULL;
@@ -679,7 +697,7 @@ Datum
679697
tsquery_in(PG_FUNCTION_ARGS)
680698
{
681699
SET_FUNCOID();
682-
PG_RETURN_POINTER(queryin((char*)PG_GETARG_POINTER(0),pushval_asis,0));
700+
PG_RETURN_POINTER(queryin((char*)PG_GETARG_POINTER(0),pushval_asis,0, false));
683701
}
684702

685703
/*
@@ -910,7 +928,7 @@ to_tsquery(PG_FUNCTION_ARGS)
910928
str=text2char(in);
911929
PG_FREE_IF_COPY(in,1);
912930

913-
query=queryin(str,pushval_morph,PG_GETARG_INT32(0));
931+
query=queryin(str,pushval_morph,PG_GETARG_INT32(0),false);
914932

915933
if (query->size==0 )
916934
PG_RETURN_POINTER(query);
@@ -950,3 +968,59 @@ to_tsquery_current(PG_FUNCTION_ARGS)
950968
Int32GetDatum(get_currcfg()),
951969
PG_GETARG_DATUM(0)));
952970
}
971+
972+
Datum
973+
plainto_tsquery(PG_FUNCTION_ARGS)
974+
{
975+
text*in=PG_GETARG_TEXT_P(1);
976+
char*str;
977+
QUERYTYPE*query;
978+
ITEM*res;
979+
int4len;
980+
981+
SET_FUNCOID();
982+
983+
str=text2char(in);
984+
PG_FREE_IF_COPY(in,1);
985+
986+
query=queryin(str,pushval_morph,PG_GETARG_INT32(0), true);
987+
988+
if (query->size==0 )
989+
PG_RETURN_POINTER(query);
990+
991+
res=clean_fakeval_v2(GETQUERY(query),&len);
992+
if (!res)
993+
{
994+
query->len=HDRSIZEQT;
995+
query->size=0;
996+
PG_RETURN_POINTER(query);
997+
}
998+
memcpy((void*)GETQUERY(query), (void*)res,len*sizeof(ITEM));
999+
pfree(res);
1000+
PG_RETURN_POINTER(query);
1001+
}
1002+
1003+
Datum
1004+
plainto_tsquery_name(PG_FUNCTION_ARGS)
1005+
{
1006+
text*name=PG_GETARG_TEXT_P(0);
1007+
Datumres;
1008+
1009+
SET_FUNCOID();
1010+
res=DirectFunctionCall2(plainto_tsquery,
1011+
Int32GetDatum(name2id_cfg(name)),
1012+
PG_GETARG_DATUM(1));
1013+
1014+
PG_FREE_IF_COPY(name,0);
1015+
PG_RETURN_DATUM(res);
1016+
}
1017+
1018+
Datum
1019+
plainto_tsquery_current(PG_FUNCTION_ARGS)
1020+
{
1021+
SET_FUNCOID();
1022+
PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery,
1023+
Int32GetDatum(get_currcfg()),
1024+
PG_GETARG_DATUM(0)));
1025+
}
1026+

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp