7
7
*
8
8
*
9
9
* IDENTIFICATION
10
- * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.10 2007/11/15 22 :25:16 momjian Exp $
10
+ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.11 2007/11/20 02 :25:22 adunstan Exp $
11
11
*
12
12
*-------------------------------------------------------------------------
13
13
*/
50
50
#define DECIMAL 20
51
51
#define SIGNEDINT 21
52
52
#define UNSIGNEDINT 22
53
- #define HTMLENTITY 23
53
+ #define XMLENTITY 23
54
54
55
55
#define LASTNUM 23
56
56
@@ -95,7 +95,7 @@ static const char *const lex_descr[] = {
95
95
"Hyphenated word part, all letters" ,
96
96
"Hyphenated word part, all ASCII" ,
97
97
"Space symbols" ,
98
- "HTML tag" ,
98
+ "XML tag" ,
99
99
"Protocol head" ,
100
100
"Hyphenated word, letters and digits" ,
101
101
"Hyphenated word, all ASCII" ,
@@ -105,7 +105,7 @@ static const char *const lex_descr[] = {
105
105
"Decimal notation" ,
106
106
"Signed integer" ,
107
107
"Unsigned integer" ,
108
- "HTML entity"
108
+ "XML entity"
109
109
};
110
110
111
111
@@ -132,11 +132,13 @@ typedef enum
132
132
TPS_InMantissaFirst ,
133
133
TPS_InMantissaSign ,
134
134
TPS_InMantissa ,
135
- TPS_InHTMLEntityFirst ,
136
- TPS_InHTMLEntity ,
137
- TPS_InHTMLEntityNumFirst ,
138
- TPS_InHTMLEntityNum ,
139
- TPS_InHTMLEntityEnd ,
135
+ TPS_InXMLEntityFirst ,
136
+ TPS_InXMLEntity ,
137
+ TPS_InXMLEntityNumFirst ,
138
+ TPS_InXMLEntityNum ,
139
+ TPS_InXMLEntityHexNumFirst ,
140
+ TPS_InXMLEntityHexNum ,
141
+ TPS_InXMLEntityEnd ,
140
142
TPS_InTagFirst ,
141
143
TPS_InXMLBegin ,
142
144
TPS_InTagCloseFirst ,
@@ -653,7 +655,7 @@ static const TParserStateActionItem actionTPS_Base[] = {
653
655
{p_isdigit ,0 ,A_NEXT ,TPS_InUnsignedInt ,0 ,NULL },
654
656
{p_iseqC ,'-' ,A_PUSH ,TPS_InSignedIntFirst ,0 ,NULL },
655
657
{p_iseqC ,'+' ,A_PUSH ,TPS_InSignedIntFirst ,0 ,NULL },
656
- {p_iseqC ,'&' ,A_PUSH ,TPS_InHTMLEntityFirst ,0 ,NULL },
658
+ {p_iseqC ,'&' ,A_PUSH ,TPS_InXMLEntityFirst ,0 ,NULL },
657
659
{p_iseqC ,'~' ,A_PUSH ,TPS_InFileTwiddle ,0 ,NULL },
658
660
{p_iseqC ,'/' ,A_PUSH ,TPS_InFileFirst ,0 ,NULL },
659
661
{p_iseqC ,'.' ,A_PUSH ,TPS_InPathFirstFirst ,0 ,NULL },
@@ -811,35 +813,56 @@ static const TParserStateActionItem actionTPS_InMantissa[] = {
811
813
{NULL ,0 ,A_BINGO ,TPS_Base ,SCIENTIFIC ,NULL }
812
814
};
813
815
814
- static const TParserStateActionItem actionTPS_InHTMLEntityFirst []= {
816
+ static const TParserStateActionItem actionTPS_InXMLEntityFirst []= {
815
817
{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
816
- {p_iseqC ,'#' ,A_NEXT ,TPS_InHTMLEntityNumFirst ,0 ,NULL },
817
- {p_isasclet ,0 ,A_NEXT ,TPS_InHTMLEntity ,0 ,NULL },
818
+ {p_iseqC ,'#' ,A_NEXT ,TPS_InXMLEntityNumFirst ,0 ,NULL },
819
+ {p_isasclet ,0 ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
820
+ {p_iseqC ,':' ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
821
+ {p_iseqC ,'_' ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
818
822
{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
819
823
};
820
824
821
- static const TParserStateActionItem actionTPS_InHTMLEntity []= {
825
+ static const TParserStateActionItem actionTPS_InXMLEntity []= {
822
826
{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
823
- {p_isasclet ,0 ,A_NEXT ,TPS_InHTMLEntity ,0 ,NULL },
824
- {p_iseqC ,';' ,A_NEXT ,TPS_InHTMLEntityEnd ,0 ,NULL },
827
+ {p_isalnum ,0 ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
828
+ {p_iseqC ,':' ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
829
+ {p_iseqC ,'_' ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
830
+ {p_iseqC ,':' ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
831
+ {p_iseqC ,'.' ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
832
+ {p_iseqC ,'-' ,A_NEXT ,TPS_InXMLEntity ,0 ,NULL },
833
+ {p_iseqC ,';' ,A_NEXT ,TPS_InXMLEntityEnd ,0 ,NULL },
825
834
{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
826
835
};
827
836
828
- static const TParserStateActionItem actionTPS_InHTMLEntityNumFirst []= {
837
+ static const TParserStateActionItem actionTPS_InXMLEntityNumFirst []= {
829
838
{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
830
- {p_isdigit ,0 ,A_NEXT ,TPS_InHTMLEntityNum ,0 ,NULL },
839
+ {p_iseqC ,'x' ,A_NEXT ,TPS_InXMLEntityHexNumFirst ,0 ,NULL },
840
+ {p_isdigit ,0 ,A_NEXT ,TPS_InXMLEntityNum ,0 ,NULL },
831
841
{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
832
842
};
833
843
834
- static const TParserStateActionItem actionTPS_InHTMLEntityNum []= {
844
+ static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst []= {
835
845
{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
836
- {p_isdigit ,0 ,A_NEXT ,TPS_InHTMLEntityNum ,0 ,NULL },
837
- {p_iseqC ,';' ,A_NEXT ,TPS_InHTMLEntityEnd ,0 ,NULL },
846
+ {p_isxdigit ,0 ,A_NEXT ,TPS_InXMLEntityHexNum ,0 ,NULL },
838
847
{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
839
848
};
840
849
841
- static const TParserStateActionItem actionTPS_InHTMLEntityEnd []= {
842
- {NULL ,0 ,A_BINGO |A_CLEAR ,TPS_Base ,HTMLENTITY ,NULL }
850
+ static const TParserStateActionItem actionTPS_InXMLEntityNum []= {
851
+ {p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
852
+ {p_isdigit ,0 ,A_NEXT ,TPS_InXMLEntityNum ,0 ,NULL },
853
+ {p_iseqC ,';' ,A_NEXT ,TPS_InXMLEntityEnd ,0 ,NULL },
854
+ {NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
855
+ };
856
+
857
+ static const TParserStateActionItem actionTPS_InXMLEntityHexNum []= {
858
+ {p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
859
+ {p_isxdigit ,0 ,A_NEXT ,TPS_InXMLEntityHexNum ,0 ,NULL },
860
+ {p_iseqC ,';' ,A_NEXT ,TPS_InXMLEntityEnd ,0 ,NULL },
861
+ {NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
862
+ };
863
+
864
+ static const TParserStateActionItem actionTPS_InXMLEntityEnd []= {
865
+ {NULL ,0 ,A_BINGO |A_CLEAR ,TPS_Base ,XMLENTITY ,NULL }
843
866
};
844
867
845
868
static const TParserStateActionItem actionTPS_InTagFirst []= {
@@ -854,8 +877,8 @@ static const TParserStateActionItem actionTPS_InTagFirst[] = {
854
877
static const TParserStateActionItem actionTPS_InXMLBegin []= {
855
878
{p_isEOF ,0 ,A_POP ,TPS_Null ,0 ,NULL },
856
879
/* <?xml ... */
880
+ /* XXX do we wants states for the m and l ? Right now this accepts <?xZ */
857
881
{p_iseqC ,'x' ,A_NEXT ,TPS_InTag ,0 ,NULL },
858
- {p_iseqC ,'X' ,A_NEXT ,TPS_InTag ,0 ,NULL },
859
882
{NULL ,0 ,A_POP ,TPS_Null ,0 ,NULL }
860
883
};
861
884
@@ -1278,11 +1301,13 @@ static const TParserStateAction Actions[] = {
1278
1301
TPARSERSTATEACTION (TPS_InMantissaFirst ),
1279
1302
TPARSERSTATEACTION (TPS_InMantissaSign ),
1280
1303
TPARSERSTATEACTION (TPS_InMantissa ),
1281
- TPARSERSTATEACTION (TPS_InHTMLEntityFirst ),
1282
- TPARSERSTATEACTION (TPS_InHTMLEntity ),
1283
- TPARSERSTATEACTION (TPS_InHTMLEntityNumFirst ),
1284
- TPARSERSTATEACTION (TPS_InHTMLEntityNum ),
1285
- TPARSERSTATEACTION (TPS_InHTMLEntityEnd ),
1304
+ TPARSERSTATEACTION (TPS_InXMLEntityFirst ),
1305
+ TPARSERSTATEACTION (TPS_InXMLEntity ),
1306
+ TPARSERSTATEACTION (TPS_InXMLEntityNumFirst ),
1307
+ TPARSERSTATEACTION (TPS_InXMLEntityNum ),
1308
+ TPARSERSTATEACTION (TPS_InXMLEntityHexNumFirst ),
1309
+ TPARSERSTATEACTION (TPS_InXMLEntityHexNum ),
1310
+ TPARSERSTATEACTION (TPS_InXMLEntityEnd ),
1286
1311
TPARSERSTATEACTION (TPS_InTagFirst ),
1287
1312
TPARSERSTATEACTION (TPS_InXMLBegin ),
1288
1313
TPARSERSTATEACTION (TPS_InTagCloseFirst ),
@@ -1556,9 +1581,9 @@ prsd_end(PG_FUNCTION_ARGS)
1556
1581
#define COMPLEXTOKEN (x ) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1557
1582
#define ENDPUNCTOKEN (x ) ( (x)==SPACE )
1558
1583
1559
- #define TS_IDIGNORE (x ) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==HTMLENTITY )
1584
+ #define TS_IDIGNORE (x ) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
1560
1585
#define HLIDIGNORE (x ) ( (x)==URL_T || (x)==TAG_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1561
- #define HTMLHLIDIGNORE (x ) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1586
+ #define XMLHLIDIGNORE (x ) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1562
1587
#define NONWORDTOKEN (x ) ( (x)==SPACE || HLIDIGNORE(x) )
1563
1588
#define NOENDTOKEN (x )( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
1564
1589
@@ -1839,7 +1864,7 @@ prsd_headline(PG_FUNCTION_ARGS)
1839
1864
}
1840
1865
else
1841
1866
{
1842
- if (HTMLHLIDIGNORE (prs -> words [i ].type ))
1867
+ if (XMLHLIDIGNORE (prs -> words [i ].type ))
1843
1868
prs -> words [i ].replace = 1 ;
1844
1869
}
1845
1870