88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.56 2004/12/31 22:01:22 pgsql Exp $
11+ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.57 2005/07/10 04:54:30 momjian Exp $
1212 *
1313 *Alistair Crooks added the code for the regex caching
1414 *agc - cached the regular expressions used - there's a good chance
@@ -81,38 +81,27 @@ static cached_re_str re_array[MAX_CACHED_RES];/* cached re's */
8181
8282
8383/*
84- *RE_compile_and_execute - compile and execute a RE, caching if possible
84+ *RE_compile_and_cache - compile a RE, caching if possible
8585 *
86- * ReturnsTRUE on match, FALSE on no match
86+ * Returnsregex_t
8787 *
88- *text_re --- the pattern, expressed as an *untoasted* TEXT object
89- *dat --- the data to match against (need not be null-terminated)
90- *dat_len --- the length of the data string
91- *cflags --- compile options for the pattern
92- *nmatch, pmatch--- optional return area for match details
88+ * text_re --- the pattern, expressed as an *untoasted* TEXT object
89+ * cflags --- compile options for the pattern
9390 *
94- *Both pattern and data are given in the database encoding. We internally
95- *convert to array of pg_wchar which is what Spencer's regex package wants.
91+ *Pattern is given in the database encoding. We internally convert to
92+ * array of pg_wchar which is what Spencer's regex package wants.
9693 */
97- static bool
98- RE_compile_and_execute (text * text_re ,unsignedchar * dat ,int dat_len ,
99- int cflags ,int nmatch ,regmatch_t * pmatch )
94+ static regex_t
95+ RE_compile_and_cache (text * text_re ,int cflags )
10096{
10197int text_re_len = VARSIZE (text_re );
102- pg_wchar * data ;
103- size_t data_len ;
10498pg_wchar * pattern ;
10599size_t pattern_len ;
106100int i ;
107101int regcomp_result ;
108- int regexec_result ;
109102cached_re_str re_temp ;
110103char errMsg [100 ];
111104
112- /* Convert data string to wide characters */
113- data = (pg_wchar * )palloc ((dat_len + 1 )* sizeof (pg_wchar ));
114- data_len = pg_mb2wchar_with_len (dat ,data ,dat_len );
115-
116105/*
117106 * Look for a match among previously compiled REs.Since the data
118107 * structure is self-organizing with most-used entries at the front,
@@ -134,28 +123,7 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
134123re_array [0 ]= re_temp ;
135124}
136125
137- /* Perform RE match and return result */
138- regexec_result = pg_regexec (& re_array [0 ].cre_re ,
139- data ,
140- data_len ,
141- NULL ,/* no details */
142- nmatch ,
143- pmatch ,
144- 0 );
145-
146- pfree (data );
147-
148- if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH )
149- {
150- /* re failed??? */
151- pg_regerror (regexec_result ,& re_array [0 ].cre_re ,
152- errMsg ,sizeof (errMsg ));
153- ereport (ERROR ,
154- (errcode (ERRCODE_INVALID_REGULAR_EXPRESSION ),
155- errmsg ("regular expression failed: %s" ,errMsg )));
156- }
157-
158- return (regexec_result == REG_OKAY );
126+ return re_array [0 ].cre_re ;
159127}
160128}
161129
@@ -220,10 +188,45 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
220188re_array [0 ]= re_temp ;
221189num_res ++ ;
222190
191+ return re_array [0 ].cre_re ;
192+ }
193+
194+ /*
195+ * RE_compile_and_execute - compile and execute a RE
196+ *
197+ * Returns TRUE on match, FALSE on no match
198+ *
199+ *text_re --- the pattern, expressed as an *untoasted* TEXT object
200+ *dat --- the data to match against (need not be null-terminated)
201+ *dat_len --- the length of the data string
202+ *cflags --- compile options for the pattern
203+ *nmatch, pmatch--- optional return area for match details
204+ *
205+ * Both pattern and data are given in the database encoding. We internally
206+ * convert to array of pg_wchar which is what Spencer's regex package wants.
207+ */
208+ static bool
209+ RE_compile_and_execute (text * text_re ,unsignedchar * dat ,int dat_len ,
210+ int cflags ,int nmatch ,regmatch_t * pmatch )
211+ {
212+ pg_wchar * data ;
213+ size_t data_len ;
214+ int regexec_result ;
215+ regex_t re ;
216+ char errMsg [100 ];
217+
218+ /* Convert data string to wide characters */
219+ data = (pg_wchar * )palloc ((dat_len + 1 )* sizeof (pg_wchar ));
220+ data_len = pg_mb2wchar_with_len (dat ,data ,dat_len );
221+
222+ /* Compile RE */
223+ re = RE_compile_and_cache (text_re ,cflags );
224+
223225/* Perform RE match and return result */
224226regexec_result = pg_regexec (& re_array [0 ].cre_re ,
225227data ,
226228data_len ,
229+ 0 ,
227230NULL ,/* no details */
228231nmatch ,
229232pmatch ,
@@ -428,15 +431,89 @@ textregexsubstr(PG_FUNCTION_ARGS)
428431eo = pmatch [0 ].rm_eo ;
429432}
430433
431- return ( DirectFunctionCall3 (text_substr ,
434+ return DirectFunctionCall3 (text_substr ,
432435PointerGetDatum (s ),
433436Int32GetDatum (so + 1 ),
434- Int32GetDatum (eo - so ))) ;
437+ Int32GetDatum (eo - so ));
435438}
436439
437440PG_RETURN_NULL ();
438441}
439442
443+ /*
444+ * textregexreplace_noopt()
445+ * Return a replace string matched by a regular expression.
446+ *This function is a version that doesn't specify the option of
447+ *textregexreplace. This is case sensitive, replace the first
448+ *instance only.
449+ */
450+ Datum
451+ textregexreplace_noopt (PG_FUNCTION_ARGS )
452+ {
453+ text * s = PG_GETARG_TEXT_P (0 );
454+ text * p = PG_GETARG_TEXT_P (1 );
455+ text * r = PG_GETARG_TEXT_P (2 );
456+ regex_t re ;
457+
458+ re = RE_compile_and_cache (p ,regex_flavor );
459+
460+ return DirectFunctionCall4 (replace_text_regexp ,
461+ PointerGetDatum (s ),
462+ PointerGetDatum (& re ),
463+ PointerGetDatum (r ),
464+ BoolGetDatum (false));
465+ }
466+
467+ /*
468+ * textregexreplace()
469+ * Return a replace string matched by a regular expression.
470+ */
471+ Datum
472+ textregexreplace (PG_FUNCTION_ARGS )
473+ {
474+ text * s = PG_GETARG_TEXT_P (0 );
475+ text * p = PG_GETARG_TEXT_P (1 );
476+ text * r = PG_GETARG_TEXT_P (2 );
477+ text * opt = PG_GETARG_TEXT_P (3 );
478+ char * opt_p = VARDATA (opt );
479+ int opt_len = (VARSIZE (opt )- VARHDRSZ );
480+ int i ;
481+ bool global = false;
482+ bool ignorecase = false;
483+ regex_t re ;
484+
485+ /* parse options */
486+ for (i = 0 ;i < opt_len ;i ++ )
487+ {
488+ switch (opt_p [i ])
489+ {
490+ case 'i' :
491+ ignorecase = true;
492+ break ;
493+ case 'g' :
494+ global = true;
495+ break ;
496+ default :
497+ ereport (ERROR ,
498+ (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
499+ errmsg ("invalid option of regexp_replace: %c" ,
500+ opt_p [i ])));
501+ break ;
502+ }
503+ }
504+
505+ if (ignorecase )
506+ re = RE_compile_and_cache (p ,regex_flavor |REG_ICASE );
507+ else
508+ re = RE_compile_and_cache (p ,regex_flavor );
509+
510+ return DirectFunctionCall4 (replace_text_regexp ,
511+ PointerGetDatum (s ),
512+ PointerGetDatum (& re ),
513+ PointerGetDatum (r ),
514+ BoolGetDatum (global ));
515+ }
516+
440517/* similar_escape()
441518 * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
442519 * our regexp engine.