Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit66a3f90

Browse files
hugopendleburykeith-hallrobinst
authored
RegexBuilder: Add options for multi_line, ignore_whitespace, dot_matches_new_line (#165)
Also fixes `case_insensitive`.Co-authored-by: Keith Hall <keith-hall@users.noreply.github.com>Co-authored-by: Robin Stocker <robinst@canva.com>
1 parentaa03976 commit66a3f90

File tree

4 files changed

+406
-51
lines changed

4 files changed

+406
-51
lines changed

‎src/flags.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pubconstFLAG_CASEI:u32 =1;
2+
pubconstFLAG_MULTI:u32 =1 <<1;
3+
pubconstFLAG_DOTNL:u32 =1 <<2;
4+
pubconstFLAG_SWAP_GREED:u32 =1 <<3;
5+
pubconstFLAG_IGNORE_SPACE:u32 =1 <<4;
6+
pubconstFLAG_UNICODE:u32 =1 <<5;

‎src/lib.rs

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ Conditionals - if/then/else:
181181

182182
externcrate alloc;
183183

184-
use alloc::borrow::{Cow,ToOwned};
184+
use alloc::borrow::Cow;
185185
use alloc::boxed::Box;
186186
use alloc::string::{String,ToString};
187187
use alloc::sync::Arc;
@@ -202,12 +202,14 @@ mod analyze;
202202
mod compile;
203203
mod error;
204204
mod expand;
205+
mod flags;
205206
mod parse;
206207
mod replacer;
207208
mod vm;
208209

209210
usecrate::analyze::analyze;
210211
usecrate::compile::compile;
212+
usecrate::flags::*;
211213
usecrate::parse::{ExprTree,NamedGroups,Parser};
212214
usecrate::vm::{Prog,OPTION_SKIPPED_EMPTY_MATCH};
213215

@@ -540,6 +542,28 @@ struct RegexOptions {
540542
delegate_dfa_size_limit:Option<usize>,
541543
}
542544

545+
implRegexOptions{
546+
fnget_flag_value(flag_value:bool,enum_value:u32) ->u32{
547+
if flag_value{
548+
enum_value
549+
}else{
550+
0
551+
}
552+
}
553+
554+
fncompute_flags(&self) ->u32{
555+
let insensitive =Self::get_flag_value(self.syntaxc.get_case_insensitive(),FLAG_CASEI);
556+
let multiline =Self::get_flag_value(self.syntaxc.get_multi_line(),FLAG_MULTI);
557+
let whitespace =
558+
Self::get_flag_value(self.syntaxc.get_ignore_whitespace(),FLAG_IGNORE_SPACE);
559+
let dotnl =Self::get_flag_value(self.syntaxc.get_dot_matches_new_line(),FLAG_DOTNL);
560+
let unicode =Self::get_flag_value(self.syntaxc.get_unicode(),FLAG_UNICODE);
561+
562+
let all_flags = insensitive | multiline | whitespace | dotnl | unicode | unicode;
563+
all_flags
564+
}
565+
}
566+
543567
implDefaultforRegexOptions{
544568
fndefault() ->Self{
545569
RegexOptions{
@@ -569,15 +593,67 @@ impl RegexBuilder {
569593
Regex::new_options(self.0.clone())
570594
}
571595

596+
fnset_config(&mutself,func:implFn(SyntaxConfig) ->SyntaxConfig) ->&mutSelf{
597+
self.0.syntaxc =func(self.0.syntaxc);
598+
self
599+
}
600+
572601
/// Override default case insensitive
573602
/// this is to enable/disable casing via builder instead of a flag within
574603
/// the raw string provided to the regex builder
575604
///
576605
/// Default is false
577606
pubfncase_insensitive(&mutself,yes:bool) ->&mutSelf{
578-
let syntaxc =self.0.syntaxc.to_owned();
579-
self.0.syntaxc = syntaxc.case_insensitive(yes);
580-
self
607+
self.set_config(|x| x.case_insensitive(yes))
608+
}
609+
610+
/// Enable multi-line regex
611+
pubfnmulti_line(&mutself,yes:bool) ->&mutSelf{
612+
self.set_config(|x| x.multi_line(yes))
613+
}
614+
615+
/// Allow ignore whitespace
616+
pubfnignore_whitespace(&mutself,yes:bool) ->&mutSelf{
617+
self.set_config(|x| x.ignore_whitespace(yes))
618+
}
619+
620+
/// Enable or disable the "dot matches any character" flag.
621+
/// When this is enabled, `.` will match any character. When it's disabled, then `.` will match any character
622+
/// except for a new line character.
623+
pubfndot_matches_new_line(&mutself,yes:bool) ->&mutSelf{
624+
self.set_config(|x| x.dot_matches_new_line(yes))
625+
}
626+
627+
/// Enable verbose mode in the regular expression.
628+
///
629+
/// The same as ignore_whitespace
630+
///
631+
/// When enabled, verbose mode permits insigificant whitespace in many
632+
/// places in the regular expression, as well as comments. Comments are
633+
/// started using `#` and continue until the end of the line.
634+
///
635+
/// By default, this is disabled. It may be selectively enabled in the
636+
/// regular expression by using the `x` flag regardless of this setting.
637+
pubfnverbose_mode(&mutself,yes:bool) ->&mutSelf{
638+
self.set_config(|x| x.ignore_whitespace(yes))
639+
}
640+
641+
/// Enable or disable the Unicode flag (`u`) by default.
642+
///
643+
/// By default this is **enabled**. It may alternatively be selectively
644+
/// disabled in the regular expression itself via the `u` flag.
645+
///
646+
/// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
647+
/// default), a regular expression will fail to parse if Unicode mode is
648+
/// disabled and a sub-expression could possibly match invalid UTF-8.
649+
///
650+
/// **WARNING**: Unicode mode can greatly increase the size of the compiled
651+
/// DFA, which can noticeably impact both memory usage and compilation
652+
/// time. This is especially noticeable if your regex contains character
653+
/// classes like `\w` that are impacted by whether Unicode is enabled or
654+
/// not. If Unicode is not necessary, you are encouraged to disable it.
655+
pubfnunicode_mode(&mutself,yes:bool) ->&mutSelf{
656+
self.set_config(|x| x.unicode(yes))
581657
}
582658

583659
/// Limit for how many times backtracking should be attempted for fancy regexes (where
@@ -649,7 +725,7 @@ impl Regex {
649725
}
650726

651727
fnnew_options(options:RegexOptions) ->Result<Regex>{
652-
let raw_tree =Expr::parse_tree(&options.pattern)?;
728+
let raw_tree =Expr::parse_tree_with_flags(&options.pattern, options.compute_flags())?;
653729

654730
// wrapper to search for re at arbitrary start position,
655731
// and to capture the match bounds
@@ -1612,6 +1688,12 @@ impl Expr {
16121688
Parser::parse(re)
16131689
}
16141690

1691+
/// Parse the regex and return an expression (AST)
1692+
/// Flags should be bit based based on flags
1693+
pubfnparse_tree_with_flags(re:&str,flags:u32) ->Result<ExprTree>{
1694+
Parser::parse_with_flags(re, flags)
1695+
}
1696+
16151697
/// Convert expression to a regex string in the regex crate's syntax.
16161698
///
16171699
/// # Panics

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp