Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit18bac60

Browse files
committed
Let regexp_replace() make use of REG_NOSUB when feasible.
If the replacement string doesn't contain \1...\9, then we don'tneed sub-match locations, so we can use the REG_NOSUB optimizationhere too. There's already a pre-scan of the replacement stringto look for backslashes, so extend that to check for digits, andrefactor to allow that to happen before we compile the regexp.While at it, try to speed up the pre-scan by using memchr() insteadof a handwritten loop. It's likely that this is lost in the noisecompared to the regexp processing proper, but maybe not. In anycase, this coding is shorter.Also, add some test cases to improve the poor coverage ofappendStringInfoRegexpSubstr().Discussion:https://postgr.es/m/3534632.1628536485@sss.pgh.pa.us
1 parente126945 commit18bac60

File tree

5 files changed

+90
-65
lines changed

5 files changed

+90
-65
lines changed

‎src/backend/utils/adt/regexp.c

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -630,11 +630,10 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
630630
text*s=PG_GETARG_TEXT_PP(0);
631631
text*p=PG_GETARG_TEXT_PP(1);
632632
text*r=PG_GETARG_TEXT_PP(2);
633-
regex_t*re;
634-
635-
re=RE_compile_and_cache(p,REG_ADVANCED,PG_GET_COLLATION());
636633

637-
PG_RETURN_TEXT_P(replace_text_regexp(s, (void*)re,r,0,1));
634+
PG_RETURN_TEXT_P(replace_text_regexp(s,p,r,
635+
REG_ADVANCED,PG_GET_COLLATION(),
636+
0,1));
638637
}
639638

640639
/*
@@ -648,7 +647,6 @@ textregexreplace(PG_FUNCTION_ARGS)
648647
text*p=PG_GETARG_TEXT_PP(1);
649648
text*r=PG_GETARG_TEXT_PP(2);
650649
text*opt=PG_GETARG_TEXT_PP(3);
651-
regex_t*re;
652650
pg_re_flagsflags;
653651

654652
/*
@@ -672,10 +670,9 @@ textregexreplace(PG_FUNCTION_ARGS)
672670

673671
parse_re_flags(&flags,opt);
674672

675-
re=RE_compile_and_cache(p,flags.cflags,PG_GET_COLLATION());
676-
677-
PG_RETURN_TEXT_P(replace_text_regexp(s, (void*)re,r,0,
678-
flags.glob ?0 :1));
673+
PG_RETURN_TEXT_P(replace_text_regexp(s,p,r,
674+
flags.cflags,PG_GET_COLLATION(),
675+
0,flags.glob ?0 :1));
679676
}
680677

681678
/*
@@ -694,7 +691,6 @@ textregexreplace_extended(PG_FUNCTION_ARGS)
694691
intn=1;
695692
text*flags=PG_GETARG_TEXT_PP_IF_EXISTS(5);
696693
pg_re_flagsre_flags;
697-
regex_t*re;
698694

699695
/* Collect optional parameters */
700696
if (PG_NARGS()>3)
@@ -723,11 +719,10 @@ textregexreplace_extended(PG_FUNCTION_ARGS)
723719
if (PG_NARGS() <=4)
724720
n=re_flags.glob ?0 :1;
725721

726-
/* Compile the regular expression */
727-
re=RE_compile_and_cache(p,re_flags.cflags,PG_GET_COLLATION());
728-
729722
/* Do the replacement(s) */
730-
PG_RETURN_TEXT_P(replace_text_regexp(s, (void*)re,r,start-1,n));
723+
PG_RETURN_TEXT_P(replace_text_regexp(s,p,r,
724+
re_flags.cflags,PG_GET_COLLATION(),
725+
start-1,n));
731726
}
732727

733728
/* This is separate to keep the opr_sanity regression test from complaining */

‎src/backend/utils/adt/varlena.c

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4359,34 +4359,36 @@ replace_text(PG_FUNCTION_ARGS)
43594359
}
43604360

43614361
/*
4362-
*check_replace_text_has_escape_char
4362+
*check_replace_text_has_escape
43634363
*
4364-
* check whether replace_text contains escape char.
4364+
* Returns 0 if text contains no backslashes that need processing.
4365+
* Returns 1 if text contains backslashes, but not regexp submatch specifiers.
4366+
* Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
43654367
*/
4366-
staticbool
4367-
check_replace_text_has_escape_char(consttext*replace_text)
4368+
staticint
4369+
check_replace_text_has_escape(consttext*replace_text)
43684370
{
4371+
intresult=0;
43694372
constchar*p=VARDATA_ANY(replace_text);
43704373
constchar*p_end=p+VARSIZE_ANY_EXHDR(replace_text);
43714374

4372-
if (pg_database_encoding_max_length()==1)
4373-
{
4374-
for (;p<p_end;p++)
4375-
{
4376-
if (*p=='\\')
4377-
return true;
4378-
}
4379-
}
4380-
else
4375+
while (p<p_end)
43814376
{
4382-
for (;p<p_end;p+=pg_mblen(p))
4377+
/* Find next escape char, if any. */
4378+
p=memchr(p,'\\',p_end-p);
4379+
if (p==NULL)
4380+
break;
4381+
p++;
4382+
/* Note: a backslash at the end doesn't require extra processing. */
4383+
if (p<p_end)
43834384
{
4384-
if (*p=='\\')
4385-
return true;
4385+
if (*p >='1'&&*p <='9')
4386+
return2;/* Found a submatch specifier, so done */
4387+
result=1;/* Found some other sequence, keep looking */
4388+
p++;
43864389
}
43874390
}
4388-
4389-
return false;
4391+
returnresult;
43904392
}
43914393

43924394
/*
@@ -4403,25 +4405,17 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
44034405
{
44044406
constchar*p=VARDATA_ANY(replace_text);
44054407
constchar*p_end=p+VARSIZE_ANY_EXHDR(replace_text);
4406-
inteml=pg_database_encoding_max_length();
44074408

4408-
for (;;)
4409+
while (p<p_end)
44094410
{
44104411
constchar*chunk_start=p;
44114412
intso;
44124413
inteo;
44134414

4414-
/* Find next escape char. */
4415-
if (eml==1)
4416-
{
4417-
for (;p<p_end&&*p!='\\';p++)
4418-
/* nothing */ ;
4419-
}
4420-
else
4421-
{
4422-
for (;p<p_end&&*p!='\\';p+=pg_mblen(p))
4423-
/* nothing */ ;
4424-
}
4415+
/* Find next escape char, if any. */
4416+
p=memchr(p,'\\',p_end-p);
4417+
if (p==NULL)
4418+
p=p_end;
44254419

44264420
/* Copy the text we just scanned over, if any. */
44274421
if (p>chunk_start)
@@ -4473,7 +4467,7 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
44734467
continue;
44744468
}
44754469

4476-
if (so!=-1&&eo!=-1)
4470+
if (so>=0&&eo>=0)
44774471
{
44784472
/*
44794473
* Copy the text that is back reference of regexp. Note so and eo
@@ -4491,45 +4485,57 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
44914485
}
44924486
}
44934487

4494-
#defineREGEXP_REPLACE_BACKREF_CNT10
4495-
44964488
/*
44974489
* replace_text_regexp
44984490
*
4499-
* replace substring(s) in src_text that match regexp with replace_text.
4491+
* replace substring(s) in src_text that match pattern with replace_text.
4492+
* The replace_text can contain backslash markers to substitute
4493+
* (parts of) the matched text.
45004494
*
4495+
* cflags: regexp compile flags.
4496+
* collation: collation to use.
45014497
* search_start: the character (not byte) offset in src_text at which to
45024498
* begin searching.
45034499
* n: if 0, replace all matches; if > 0, replace only the N'th match.
4504-
*
4505-
* Note: to avoid having to include regex.h in builtins.h, we declare
4506-
* the regexp argument as void *, but really it's regex_t *.
45074500
*/
45084501
text*
4509-
replace_text_regexp(text*src_text,void*regexp,
4502+
replace_text_regexp(text*src_text,text*pattern_text,
45104503
text*replace_text,
4504+
intcflags,Oidcollation,
45114505
intsearch_start,intn)
45124506
{
45134507
text*ret_text;
4514-
regex_t*re= (regex_t*)regexp;
4508+
regex_t*re;
45154509
intsrc_text_len=VARSIZE_ANY_EXHDR(src_text);
45164510
intnmatches=0;
45174511
StringInfoDatabuf;
4518-
regmatch_tpmatch[REGEXP_REPLACE_BACKREF_CNT];
4512+
regmatch_tpmatch[10];/* main match, plus \1 to \9 */
4513+
intnmatch=lengthof(pmatch);
45194514
pg_wchar*data;
45204515
size_tdata_len;
45214516
intdata_pos;
45224517
char*start_ptr;
4523-
boolhave_escape;
4518+
intescape_status;
45244519

45254520
initStringInfo(&buf);
45264521

45274522
/* Convert data string to wide characters. */
45284523
data= (pg_wchar*)palloc((src_text_len+1)*sizeof(pg_wchar));
45294524
data_len=pg_mb2wchar_with_len(VARDATA_ANY(src_text),data,src_text_len);
45304525

4531-
/* Check whether replace_text has escape char. */
4532-
have_escape=check_replace_text_has_escape_char(replace_text);
4526+
/* Check whether replace_text has escapes, especially regexp submatches. */
4527+
escape_status=check_replace_text_has_escape(replace_text);
4528+
4529+
/* If no regexp submatches, we can use REG_NOSUB. */
4530+
if (escape_status<2)
4531+
{
4532+
cflags |=REG_NOSUB;
4533+
/* Also tell pg_regexec we only want the whole-match location. */
4534+
nmatch=1;
4535+
}
4536+
4537+
/* Prepare the regexp. */
4538+
re=RE_compile_and_cache(pattern_text,cflags,collation);
45334539

45344540
/* start_ptr points to the data_pos'th character of src_text */
45354541
start_ptr= (char*)VARDATA_ANY(src_text);
@@ -4546,7 +4552,7 @@ replace_text_regexp(text *src_text, void *regexp,
45464552
data_len,
45474553
search_start,
45484554
NULL,/* no details */
4549-
REGEXP_REPLACE_BACKREF_CNT,
4555+
nmatch,
45504556
pmatch,
45514557
0);
45524558

@@ -4602,10 +4608,9 @@ replace_text_regexp(text *src_text, void *regexp,
46024608
}
46034609

46044610
/*
4605-
* Copy the replace_text. Process back references when the
4606-
* replace_text has escape characters.
4611+
* Copy the replace_text, processing escapes if any are present.
46074612
*/
4608-
if (have_escape)
4613+
if (escape_status>0)
46094614
appendStringInfoRegexpSubstr(&buf,replace_text,pmatch,
46104615
start_ptr,data_pos);
46114616
else

‎src/include/utils/varlena.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ extern bool SplitDirectoriesString(char *rawstring, char separator,
3333
List**namelist);
3434
externboolSplitGUCList(char*rawstring,charseparator,
3535
List**namelist);
36-
externtext*replace_text_regexp(text*src_text,void*regexp,
36+
externtext*replace_text_regexp(text*src_text,text*pattern_text,
3737
text*replace_text,
38+
intcflags,Oidcollation,
3839
intsearch_start,intn);
3940

4041
#endif

‎src/test/regress/expected/strings.out

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,13 +571,32 @@ SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
571571
SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
572572
ERROR: invalid escape string
573573
HINT: Escape string must be empty or one character.
574-
-- Testback reference in regexp_replace
574+
-- Testbackslash escapes in regexp_replace's replacement string
575575
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
576576
regexp_replace
577577
----------------
578578
(111) 222-3333
579579
(1 row)
580580

581+
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\&Y', 'g');
582+
regexp_replace
583+
-------------------
584+
fXooYbaXrrYbaXzzY
585+
(1 row)
586+
587+
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\\\Y', 'g');
588+
regexp_replace
589+
----------------
590+
fX\YbaX\YbaX\Y
591+
(1 row)
592+
593+
-- not an error, though perhaps it should be:
594+
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\Y\\1Z\\');
595+
regexp_replace
596+
-----------------
597+
fX\YoZ\barrbazz
598+
(1 row)
599+
581600
SELECT regexp_replace('AAA BBB CCC ', E'\\s+', ' ', 'g');
582601
regexp_replace
583602
----------------

‎src/test/regress/sql/strings.sql

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,13 @@ SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true;
187187
SELECT'abcdefg' SIMILAR TO'_bcd%' ESCAPENULLASnull;
188188
SELECT'abcdefg' SIMILAR TO'_bcd#%' ESCAPE'##'AS error;
189189

190-
-- Testback reference in regexp_replace
190+
-- Testbackslash escapes in regexp_replace's replacement string
191191
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1)\\2-\\3');
192+
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\&Y','g');
193+
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\\\Y','g');
194+
-- not an error, though perhaps it should be:
195+
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\Y\\1Z\\');
196+
192197
SELECT regexp_replace('AAA BBB CCC', E'\\s+','','g');
193198
SELECT regexp_replace('AAA','^|$','Z','g');
194199
SELECT regexp_replace('AAA aaa','A+','Z','gi');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp