Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2391e5e

Browse files
<regex>: Improve search performance for regexes with initial+ quantifiers (#5509)
Co-authored-by: Stephan T. Lavavej <stl@microsoft.com>
1 parent6de3f1c commit2391e5e

File tree

5 files changed

+104
-6
lines changed

5 files changed

+104
-6
lines changed

‎benchmarks/CMakeLists.txt‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ add_benchmark(nth_element src/nth_element.cpp)
116116
add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
117117
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
118118
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
119+
add_benchmark(regex_search src/regex_search.cpp)
119120
add_benchmark(remove src/remove.cpp)
120121
add_benchmark(replace src/replace.cpp)
121122
add_benchmark(reverse src/reverse.cpp)

‎benchmarks/src/regex_search.cpp‎

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
#include<benchmark/benchmark.h>
5+
#include<regex>
6+
#include<string>
7+
8+
#include"lorem.hpp"
9+
10+
usingnamespacestd;
11+
12+
voidbm_lorem_search(benchmark::State& state,constchar* pattern) {
13+
string repeated_lorem{lorem_ipsum};
14+
for (longlong i =0; i < state.range(); ++i) {
15+
repeated_lorem += repeated_lorem;
16+
}
17+
regex re{pattern};
18+
19+
for (auto _ : state) {
20+
benchmark::DoNotOptimize(repeated_lorem);
21+
constchar* pos = repeated_lorem.data();
22+
constchar* end = repeated_lorem.data() + repeated_lorem.size();
23+
cmatch match;
24+
for (;regex_search(pos, end, match, re); ++pos) {
25+
benchmark::DoNotOptimize(match);
26+
pos = match[0].second;
27+
if (pos == end) {
28+
break;
29+
}
30+
}
31+
}
32+
}
33+
34+
BENCHMARK_CAPTURE(bm_lorem_search,"bibe","bibe")->Arg(2)->Arg(3)->Arg(4);
35+
BENCHMARK_CAPTURE(bm_lorem_search,"(bibe)+","(bibe)+")->Arg(2)->Arg(3)->Arg(4);
36+
BENCHMARK_CAPTURE(bm_lorem_search,"(?:bibe)+","(?:bibe)+")->Arg(2)->Arg(3)->Arg(4);
37+
38+
BENCHMARK_MAIN();

‎stl/inc/regex‎

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4128,6 +4128,15 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
41284128
break;
41294129
}
41304130

4131+
case _N_rep:
4132+
{
4133+
_Node_rep* _Node = static_cast<_Node_rep*>(_Nx);
4134+
if (_Node->_Min == 0) {
4135+
return _First_arg;
4136+
}
4137+
break;
4138+
}
4139+
41314140
case _N_begin:
41324141
break;
41334142

@@ -4139,7 +4148,6 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
41394148
case _N_neg_assert:
41404149
case _N_back:
41414150
case _N_endif:
4142-
case _N_rep:
41434151
case _N_end_rep:
41444152
default:
41454153
return _First_arg;

‎tests/std/include/test_regex_support.hpp‎

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,9 @@ class test_regex {
282282
submatches_success =false;
283283
break;
284284
}
285-
}elseif (!actual_capture.matched || actual_capture.first != (mr[0].first + expected_capture.first)
286-
|| actual_capture.second != (mr[0].first + expected_capture.second)) {
285+
}elseif (!actual_capture.matched
286+
|| actual_capture.first != (subject.begin() + expected_capture.first)
287+
|| actual_capture.second != (subject.begin() + expected_capture.second)) {
287288
submatches_success =false;
288289
break;
289290
}
@@ -297,7 +298,8 @@ class test_regex {
297298
for (constauto& expected_capture : capture_groups) {
298299
std::string capture ="(unmatched)";
299300
if (expected_capture.first != -1) {
300-
capture.assign(mr[0].first + expected_capture.first, mr[0].first + expected_capture.second);
301+
capture.assign(
302+
subject.begin() + expected_capture.first, subject.begin() + expected_capture.second);
301303
}
302304
printf(R"(%s"%s" [%td %td])", initial ?"" :",", capture.c_str(), expected_capture.first,
303305
expected_capture.second);
@@ -313,8 +315,8 @@ class test_regex {
313315
std::ptrdiff_t last = -1;
314316
if (actual_capture.matched) {
315317
capture = actual_capture.str();
316-
first = actual_capture.first -mr[0].first;
317-
last = actual_capture.second -mr[0].first;
318+
first = actual_capture.first -subject.begin();
319+
last = actual_capture.second -subject.begin();
318320
}
319321
printf(R"(%s"%s" [%td %td])", initial ?"" :",", capture.c_str(), first, last);
320322
initial =false;

‎tests/std/tests/VSO_0000000_regex_use/test.cpp‎

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,6 +1889,54 @@ void test_gh_5490() {
18891889
}
18901890
}
18911891

1892+
voidtest_gh_5509() {
1893+
// GH-5509 extended the matcher's skip optimization
1894+
// to regexes starting with a loop with at least one repetition,
1895+
// speeding up searches for such regexes
1896+
1897+
{
1898+
test_regexchar_plus_regex(&g_regexTester,"(a+)");
1899+
char_plus_regex.should_search_match_capture_groups("blwerofaaweraf","aa", match_default, {{7,9}});
1900+
char_plus_regex.should_search_fail("blwerofwerf");
1901+
}
1902+
1903+
{
1904+
test_regexcharclass_plus_regex(&g_regexTester,"([fa]+)");
1905+
charclass_plus_regex.should_search_match_capture_groups("blwerofaaweraf","faa", match_default, {{6,9}});
1906+
charclass_plus_regex.should_search_fail("blwerower");
1907+
}
1908+
1909+
{
1910+
test_regexstring_plus_regex(&g_regexTester,"((?:aw)+)");
1911+
string_plus_regex.should_search_match_capture_groups("blwerofaawaweraf","awaw", match_default, {{8,12}});
1912+
string_plus_regex.should_search_fail("blwerofaerwaf");
1913+
}
1914+
1915+
{
1916+
test_regexanchored_string_plus_regex(&g_regexTester,"((?:^aw)+)");
1917+
anchored_string_plus_regex.should_search_match_capture_groups(
1918+
"blwerofa\nawaweraf","aw", match_default, {{9,11}});
1919+
anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
1920+
}
1921+
1922+
{
1923+
test_regexanchored_string_plus_regex(&g_regexTester,"((?:$\naw)+)");
1924+
anchored_string_plus_regex.should_search_match_capture_groups(
1925+
"blwerofa\nawaweraf","\naw", match_default, {{8,11}});
1926+
anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
1927+
}
1928+
1929+
{
1930+
test_regexstring_star_string_regex(&g_regexTester,"((?:aw)*fa)");
1931+
string_star_string_regex.should_search_match_capture_groups(
1932+
"blwerofaawawfaeraf","fa", match_default, {{6,8}});
1933+
string_star_string_regex.should_search_match_capture_groups(
1934+
"blweroawawfaeraf","awawfa", match_default, {{6,12}});
1935+
string_star_string_regex.should_search_match("blwerofaerwaf","fa");
1936+
string_star_string_regex.should_search_fail("blweroerwaf");
1937+
}
1938+
}
1939+
18921940
intmain() {
18931941
test_dev10_449367_case_insensitivity_should_work();
18941942
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
@@ -1936,6 +1984,7 @@ int main() {
19361984
test_gh_5374();
19371985
test_gh_5377();
19381986
test_gh_5490();
1987+
test_gh_5509();
19391988

19401989
return g_regexTester.result();
19411990
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp