Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1157f3c

Browse files
committed
Change descriptions of entity and tag objects to "XML entity" and "XML tag".
Allow tag and entity names that follow XML rules. Provide for hexadecimalas well as decimal numeric entities. Adjust code names to coincide withnew descriptions.
1 parenta262394 commit1157f3c

File tree

3 files changed

+63
-38
lines changed

3 files changed

+63
-38
lines changed

‎doc/src/sgml/textsearch.sgml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.36 2007/11/16 03:23:07 tgl Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.37 2007/11/20 02:25:22 adunstan Exp $ -->
22

33
<chapter id="textsearch">
44
<title id="textsearch-title">Full Text Search</title>
@@ -1862,12 +1862,12 @@ LIMIT 10;
18621862
</row>
18631863
<row>
18641864
<entry><literal>tag</></entry>
1865-
<entry>HTML tag</entry>
1866-
<entry><literal>&lt;A HREF="dictionaries.html"&gt;</literal></entry>
1865+
<entry>XML tag</entry>
1866+
<entry><literal>&lt;a href="dictionaries.html"&gt;</literal></entry>
18671867
</row>
18681868
<row>
18691869
<entry><literal>entity</></entry>
1870-
<entry>HTML entity</entry>
1870+
<entry>XML entity</entry>
18711871
<entry><literal>&amp;amp;</literal></entry>
18721872
</row>
18731873
<row>

‎src/backend/tsearch/wparser_def.c

Lines changed: 57 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.10 2007/11/15 22:25:16 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.11 2007/11/20 02:25:22 adunstan Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -50,7 +50,7 @@
5050
#defineDECIMAL20
5151
#defineSIGNEDINT21
5252
#defineUNSIGNEDINT22
53-
#defineHTMLENTITY23
53+
#defineXMLENTITY23
5454

5555
#defineLASTNUM23
5656

@@ -95,7 +95,7 @@ static const char *const lex_descr[] = {
9595
"Hyphenated word part, all letters",
9696
"Hyphenated word part, all ASCII",
9797
"Space symbols",
98-
"HTML tag",
98+
"XML tag",
9999
"Protocol head",
100100
"Hyphenated word, letters and digits",
101101
"Hyphenated word, all ASCII",
@@ -105,7 +105,7 @@ static const char *const lex_descr[] = {
105105
"Decimal notation",
106106
"Signed integer",
107107
"Unsigned integer",
108-
"HTML entity"
108+
"XML entity"
109109
};
110110

111111

@@ -132,11 +132,13 @@ typedef enum
132132
TPS_InMantissaFirst,
133133
TPS_InMantissaSign,
134134
TPS_InMantissa,
135-
TPS_InHTMLEntityFirst,
136-
TPS_InHTMLEntity,
137-
TPS_InHTMLEntityNumFirst,
138-
TPS_InHTMLEntityNum,
139-
TPS_InHTMLEntityEnd,
135+
TPS_InXMLEntityFirst,
136+
TPS_InXMLEntity,
137+
TPS_InXMLEntityNumFirst,
138+
TPS_InXMLEntityNum,
139+
TPS_InXMLEntityHexNumFirst,
140+
TPS_InXMLEntityHexNum,
141+
TPS_InXMLEntityEnd,
140142
TPS_InTagFirst,
141143
TPS_InXMLBegin,
142144
TPS_InTagCloseFirst,
@@ -653,7 +655,7 @@ static const TParserStateActionItem actionTPS_Base[] = {
653655
{p_isdigit,0,A_NEXT,TPS_InUnsignedInt,0,NULL},
654656
{p_iseqC,'-',A_PUSH,TPS_InSignedIntFirst,0,NULL},
655657
{p_iseqC,'+',A_PUSH,TPS_InSignedIntFirst,0,NULL},
656-
{p_iseqC,'&',A_PUSH,TPS_InHTMLEntityFirst,0,NULL},
658+
{p_iseqC,'&',A_PUSH,TPS_InXMLEntityFirst,0,NULL},
657659
{p_iseqC,'~',A_PUSH,TPS_InFileTwiddle,0,NULL},
658660
{p_iseqC,'/',A_PUSH,TPS_InFileFirst,0,NULL},
659661
{p_iseqC,'.',A_PUSH,TPS_InPathFirstFirst,0,NULL},
@@ -811,35 +813,56 @@ static const TParserStateActionItem actionTPS_InMantissa[] = {
811813
{NULL,0,A_BINGO,TPS_Base,SCIENTIFIC,NULL}
812814
};
813815

814-
staticconstTParserStateActionItemactionTPS_InHTMLEntityFirst[]= {
816+
staticconstTParserStateActionItemactionTPS_InXMLEntityFirst[]= {
815817
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
816-
{p_iseqC,'#',A_NEXT,TPS_InHTMLEntityNumFirst,0,NULL},
817-
{p_isasclet,0,A_NEXT,TPS_InHTMLEntity,0,NULL},
818+
{p_iseqC,'#',A_NEXT,TPS_InXMLEntityNumFirst,0,NULL},
819+
{p_isasclet,0,A_NEXT,TPS_InXMLEntity,0,NULL},
820+
{p_iseqC,':',A_NEXT,TPS_InXMLEntity,0,NULL},
821+
{p_iseqC,'_',A_NEXT,TPS_InXMLEntity,0,NULL},
818822
{NULL,0,A_POP,TPS_Null,0,NULL}
819823
};
820824

821-
staticconstTParserStateActionItemactionTPS_InHTMLEntity[]= {
825+
staticconstTParserStateActionItemactionTPS_InXMLEntity[]= {
822826
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
823-
{p_isasclet,0,A_NEXT,TPS_InHTMLEntity,0,NULL},
824-
{p_iseqC,';',A_NEXT,TPS_InHTMLEntityEnd,0,NULL},
827+
{p_isalnum,0,A_NEXT,TPS_InXMLEntity,0,NULL},
828+
{p_iseqC,':',A_NEXT,TPS_InXMLEntity,0,NULL},
829+
{p_iseqC,'_',A_NEXT,TPS_InXMLEntity,0,NULL},
830+
{p_iseqC,':',A_NEXT,TPS_InXMLEntity,0,NULL},
831+
{p_iseqC,'.',A_NEXT,TPS_InXMLEntity,0,NULL},
832+
{p_iseqC,'-',A_NEXT,TPS_InXMLEntity,0,NULL},
833+
{p_iseqC,';',A_NEXT,TPS_InXMLEntityEnd,0,NULL},
825834
{NULL,0,A_POP,TPS_Null,0,NULL}
826835
};
827836

828-
staticconstTParserStateActionItemactionTPS_InHTMLEntityNumFirst[]= {
837+
staticconstTParserStateActionItemactionTPS_InXMLEntityNumFirst[]= {
829838
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
830-
{p_isdigit,0,A_NEXT,TPS_InHTMLEntityNum,0,NULL},
839+
{p_iseqC,'x',A_NEXT,TPS_InXMLEntityHexNumFirst,0,NULL},
840+
{p_isdigit,0,A_NEXT,TPS_InXMLEntityNum,0,NULL},
831841
{NULL,0,A_POP,TPS_Null,0,NULL}
832842
};
833843

834-
staticconstTParserStateActionItemactionTPS_InHTMLEntityNum[]= {
844+
staticconstTParserStateActionItemactionTPS_InXMLEntityHexNumFirst[]= {
835845
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
836-
{p_isdigit,0,A_NEXT,TPS_InHTMLEntityNum,0,NULL},
837-
{p_iseqC,';',A_NEXT,TPS_InHTMLEntityEnd,0,NULL},
846+
{p_isxdigit,0,A_NEXT,TPS_InXMLEntityHexNum,0,NULL},
838847
{NULL,0,A_POP,TPS_Null,0,NULL}
839848
};
840849

841-
staticconstTParserStateActionItemactionTPS_InHTMLEntityEnd[]= {
842-
{NULL,0,A_BINGO |A_CLEAR,TPS_Base,HTMLENTITY,NULL}
850+
staticconstTParserStateActionItemactionTPS_InXMLEntityNum[]= {
851+
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
852+
{p_isdigit,0,A_NEXT,TPS_InXMLEntityNum,0,NULL},
853+
{p_iseqC,';',A_NEXT,TPS_InXMLEntityEnd,0,NULL},
854+
{NULL,0,A_POP,TPS_Null,0,NULL}
855+
};
856+
857+
staticconstTParserStateActionItemactionTPS_InXMLEntityHexNum[]= {
858+
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
859+
{p_isxdigit,0,A_NEXT,TPS_InXMLEntityHexNum,0,NULL},
860+
{p_iseqC,';',A_NEXT,TPS_InXMLEntityEnd,0,NULL},
861+
{NULL,0,A_POP,TPS_Null,0,NULL}
862+
};
863+
864+
staticconstTParserStateActionItemactionTPS_InXMLEntityEnd[]= {
865+
{NULL,0,A_BINGO |A_CLEAR,TPS_Base,XMLENTITY,NULL}
843866
};
844867

845868
staticconstTParserStateActionItemactionTPS_InTagFirst[]= {
@@ -854,8 +877,8 @@ static const TParserStateActionItem actionTPS_InTagFirst[] = {
854877
staticconstTParserStateActionItemactionTPS_InXMLBegin[]= {
855878
{p_isEOF,0,A_POP,TPS_Null,0,NULL},
856879
/* <?xml ... */
880+
/* XXX do we wants states for the m and l ? Right now this accepts <?xZ */
857881
{p_iseqC,'x',A_NEXT,TPS_InTag,0,NULL},
858-
{p_iseqC,'X',A_NEXT,TPS_InTag,0,NULL},
859882
{NULL,0,A_POP,TPS_Null,0,NULL}
860883
};
861884

@@ -1278,11 +1301,13 @@ static const TParserStateAction Actions[] = {
12781301
TPARSERSTATEACTION(TPS_InMantissaFirst),
12791302
TPARSERSTATEACTION(TPS_InMantissaSign),
12801303
TPARSERSTATEACTION(TPS_InMantissa),
1281-
TPARSERSTATEACTION(TPS_InHTMLEntityFirst),
1282-
TPARSERSTATEACTION(TPS_InHTMLEntity),
1283-
TPARSERSTATEACTION(TPS_InHTMLEntityNumFirst),
1284-
TPARSERSTATEACTION(TPS_InHTMLEntityNum),
1285-
TPARSERSTATEACTION(TPS_InHTMLEntityEnd),
1304+
TPARSERSTATEACTION(TPS_InXMLEntityFirst),
1305+
TPARSERSTATEACTION(TPS_InXMLEntity),
1306+
TPARSERSTATEACTION(TPS_InXMLEntityNumFirst),
1307+
TPARSERSTATEACTION(TPS_InXMLEntityNum),
1308+
TPARSERSTATEACTION(TPS_InXMLEntityHexNumFirst),
1309+
TPARSERSTATEACTION(TPS_InXMLEntityHexNum),
1310+
TPARSERSTATEACTION(TPS_InXMLEntityEnd),
12861311
TPARSERSTATEACTION(TPS_InTagFirst),
12871312
TPARSERSTATEACTION(TPS_InXMLBegin),
12881313
TPARSERSTATEACTION(TPS_InTagCloseFirst),
@@ -1556,9 +1581,9 @@ prsd_end(PG_FUNCTION_ARGS)
15561581
#defineCOMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
15571582
#defineENDPUNCTOKEN(x) ( (x)==SPACE )
15581583

1559-
#defineTS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==HTMLENTITY )
1584+
#defineTS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
15601585
#defineHLIDIGNORE(x) ( (x)==URL_T || (x)==TAG_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1561-
#defineHTMLHLIDIGNORE(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1586+
#defineXMLHLIDIGNORE(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
15621587
#defineNONWORDTOKEN(x) ( (x)==SPACE || HLIDIGNORE(x) )
15631588
#defineNOENDTOKEN(x)( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
15641589

@@ -1839,7 +1864,7 @@ prsd_headline(PG_FUNCTION_ARGS)
18391864
}
18401865
else
18411866
{
1842-
if (HTMLHLIDIGNORE(prs->words[i].type))
1867+
if (XMLHLIDIGNORE(prs->words[i].type))
18431868
prs->words[i].replace=1;
18441869
}
18451870

‎src/test/regress/expected/tsearch.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ SELECT * FROM ts_token_type('default');
222222
10 | hword_part | Hyphenated word part, all letters
223223
11 | hword_asciipart | Hyphenated word part, all ASCII
224224
12 | blank | Space symbols
225-
13 | tag |HTML tag
225+
13 | tag |XML tag
226226
14 | protocol | Protocol head
227227
15 | numhword | Hyphenated word, letters and digits
228228
16 | asciihword | Hyphenated word, all ASCII
@@ -232,7 +232,7 @@ SELECT * FROM ts_token_type('default');
232232
20 | float | Decimal notation
233233
21 | int | Signed integer
234234
22 | uint | Unsigned integer
235-
23 | entity |HTML entity
235+
23 | entity |XML entity
236236
(23 rows)
237237

238238
SELECT * FROM ts_parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp