@@ -181,7 +181,7 @@ Conditionals - if/then/else:
181
181
182
182
extern crate alloc;
183
183
184
- use alloc:: borrow:: { Cow , ToOwned } ;
184
+ use alloc:: borrow:: Cow ;
185
185
use alloc:: boxed:: Box ;
186
186
use alloc:: string:: { String , ToString } ;
187
187
use alloc:: sync:: Arc ;
@@ -202,12 +202,14 @@ mod analyze;
202
202
mod compile;
203
203
mod error;
204
204
mod expand;
205
+ mod flags;
205
206
mod parse;
206
207
mod replacer;
207
208
mod vm;
208
209
209
210
use crate :: analyze:: analyze;
210
211
use crate :: compile:: compile;
212
+ use crate :: flags:: * ;
211
213
use crate :: parse:: { ExprTree , NamedGroups , Parser } ;
212
214
use crate :: vm:: { Prog , OPTION_SKIPPED_EMPTY_MATCH } ;
213
215
@@ -540,6 +542,28 @@ struct RegexOptions {
540
542
delegate_dfa_size_limit : Option < usize > ,
541
543
}
542
544
545
+ impl RegexOptions {
546
+ fn get_flag_value ( flag_value : bool , enum_value : u32 ) ->u32 {
547
+ if flag_value{
548
+ enum_value
549
+ } else {
550
+ 0
551
+ }
552
+ }
553
+
554
+ fn compute_flags ( & self ) ->u32 {
555
+ let insensitive =Self :: get_flag_value ( self . syntaxc . get_case_insensitive ( ) , FLAG_CASEI ) ;
556
+ let multiline =Self :: get_flag_value ( self . syntaxc . get_multi_line ( ) , FLAG_MULTI ) ;
557
+ let whitespace =
558
+ Self :: get_flag_value ( self . syntaxc . get_ignore_whitespace ( ) , FLAG_IGNORE_SPACE ) ;
559
+ let dotnl =Self :: get_flag_value ( self . syntaxc . get_dot_matches_new_line ( ) , FLAG_DOTNL ) ;
560
+ let unicode =Self :: get_flag_value ( self . syntaxc . get_unicode ( ) , FLAG_UNICODE ) ;
561
+
562
+ let all_flags = insensitive | multiline | whitespace | dotnl | unicode | unicode;
563
+ all_flags
564
+ }
565
+ }
566
+
543
567
impl Default for RegexOptions {
544
568
fn default ( ) ->Self {
545
569
RegexOptions {
@@ -569,15 +593,67 @@ impl RegexBuilder {
569
593
Regex :: new_options ( self . 0 . clone ( ) )
570
594
}
571
595
596
+ fn set_config ( & mut self , func : impl Fn ( SyntaxConfig ) ->SyntaxConfig ) ->& mut Self {
597
+ self . 0 . syntaxc =func ( self . 0 . syntaxc ) ;
598
+ self
599
+ }
600
+
572
601
/// Override default case insensitive
573
602
/// this is to enable/disable casing via builder instead of a flag within
574
603
/// the raw string provided to the regex builder
575
604
///
576
605
/// Default is false
577
606
pub fn case_insensitive ( & mut self , yes : bool ) ->& mut Self {
578
- let syntaxc =self . 0 . syntaxc . to_owned ( ) ;
579
- self . 0 . syntaxc = syntaxc. case_insensitive ( yes) ;
580
- self
607
+ self . set_config ( |x| x. case_insensitive ( yes) )
608
+ }
609
+
610
+ /// Enable multi-line regex
611
+ pub fn multi_line ( & mut self , yes : bool ) ->& mut Self {
612
+ self . set_config ( |x| x. multi_line ( yes) )
613
+ }
614
+
615
+ /// Allow ignore whitespace
616
+ pub fn ignore_whitespace ( & mut self , yes : bool ) ->& mut Self {
617
+ self . set_config ( |x| x. ignore_whitespace ( yes) )
618
+ }
619
+
620
+ /// Enable or disable the "dot matches any character" flag.
621
+ /// When this is enabled, `.` will match any character. When it's disabled, then `.` will match any character
622
+ /// except for a new line character.
623
+ pub fn dot_matches_new_line ( & mut self , yes : bool ) ->& mut Self {
624
+ self . set_config ( |x| x. dot_matches_new_line ( yes) )
625
+ }
626
+
627
+ /// Enable verbose mode in the regular expression.
628
+ ///
629
+ /// The same as ignore_whitespace
630
+ ///
631
+ /// When enabled, verbose mode permits insigificant whitespace in many
632
+ /// places in the regular expression, as well as comments. Comments are
633
+ /// started using `#` and continue until the end of the line.
634
+ ///
635
+ /// By default, this is disabled. It may be selectively enabled in the
636
+ /// regular expression by using the `x` flag regardless of this setting.
637
+ pub fn verbose_mode ( & mut self , yes : bool ) ->& mut Self {
638
+ self . set_config ( |x| x. ignore_whitespace ( yes) )
639
+ }
640
+
641
+ /// Enable or disable the Unicode flag (`u`) by default.
642
+ ///
643
+ /// By default this is **enabled**. It may alternatively be selectively
644
+ /// disabled in the regular expression itself via the `u` flag.
645
+ ///
646
+ /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
647
+ /// default), a regular expression will fail to parse if Unicode mode is
648
+ /// disabled and a sub-expression could possibly match invalid UTF-8.
649
+ ///
650
+ /// **WARNING**: Unicode mode can greatly increase the size of the compiled
651
+ /// DFA, which can noticeably impact both memory usage and compilation
652
+ /// time. This is especially noticeable if your regex contains character
653
+ /// classes like `\w` that are impacted by whether Unicode is enabled or
654
+ /// not. If Unicode is not necessary, you are encouraged to disable it.
655
+ pub fn unicode_mode ( & mut self , yes : bool ) ->& mut Self {
656
+ self . set_config ( |x| x. unicode ( yes) )
581
657
}
582
658
583
659
/// Limit for how many times backtracking should be attempted for fancy regexes (where
@@ -649,7 +725,7 @@ impl Regex {
649
725
}
650
726
651
727
fn new_options ( options : RegexOptions ) ->Result < Regex > {
652
- let raw_tree =Expr :: parse_tree ( & options. pattern ) ?;
728
+ let raw_tree =Expr :: parse_tree_with_flags ( & options. pattern , options . compute_flags ( ) ) ?;
653
729
654
730
// wrapper to search for re at arbitrary start position,
655
731
// and to capture the match bounds
@@ -1612,6 +1688,12 @@ impl Expr {
1612
1688
Parser :: parse ( re)
1613
1689
}
1614
1690
1691
+ /// Parse the regex and return an expression (AST)
1692
+ /// Flags should be bit based based on flags
1693
+ pub fn parse_tree_with_flags ( re : & str , flags : u32 ) ->Result < ExprTree > {
1694
+ Parser :: parse_with_flags ( re, flags)
1695
+ }
1696
+
1615
1697
/// Convert expression to a regex string in the regex crate's syntax.
1616
1698
///
1617
1699
/// # Panics