Regular Expressions
Verify and extract login from an email address
Validates that an email address is formatted correctly, and extracts everythingbefore the @ symbol.
use lazy_static::lazy_static;use regex::Regex;fn extract_login(input: &str) -> Option<&str> { lazy_static! { static ref RE: Regex = Regex::new(r"(?x) ^(?P<login>[^@\s]+)@ ([[:word:]]+\.)* [[:word:]]+$ ").unwrap(); } RE.captures(input).and_then(|cap| { cap.name("login").map(|login| login.as_str()) })}fn main() { assert_eq!(extract_login(r"I❤email@example.com"), Some(r"I❤email")); assert_eq!( extract_login(r"sdf+sdsfsd.as.sdsd@jhkk.d.rl"), Some(r"sdf+sdsfsd.as.sdsd") ); assert_eq!(extract_login(r"More@Than@One@at.com"), None); assert_eq!(extract_login(r"Not an email@email"), None);}
Extract a list of unique #Hashtags from a text
Extracts, sorts, and deduplicates list of hashtags from text.
The hashtag regex given here only catches Latin hashtags that start with aletter. The completetwitter hashtag regex is much more complicated.
use lazy_static::lazy_static;use regex::Regex;use std::collections::HashSet;fn extract_hashtags(text: &str) -> HashSet<&str> { lazy_static! { static ref HASHTAG_REGEX : Regex = Regex::new( r"\#[a-zA-Z][0-9a-zA-Z_]*" ).unwrap(); } HASHTAG_REGEX.find_iter(text).map(|mat| mat.as_str()).collect()}fn main() { let tweet = "Hey #world, I just got my new #dog, say hello to Till. #dog #forever #2 #_ "; let tags = extract_hashtags(tweet); assert!(tags.contains("#dog") && tags.contains("#forever") && tags.contains("#world")); assert_eq!(tags.len(), 3);}
Extract phone numbers from text
Processes a string of text usingRegex::captures_iter
to capture multiplephone numbers. The example here is for US convention phone numbers.
use anyhow::Result;use regex::Regex;use std::fmt;struct PhoneNumber<'a> { area: &'a str, exchange: &'a str, subscriber: &'a str,}impl<'a> fmt::Display for PhoneNumber<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "1 ({}) {}-{}", self.area, self.exchange, self.subscriber) }}fn main() -> Result<()> { let phone_text = " +1 505 881 9292 (v) +1 505 778 2212 (c) +1 505 881 9297 (f) (202) 991 9534 Alex 5553920011 1 (800) 233-2010 1.299.339.1020"; let re = Regex::new( r#"(?x) (?:\+?1)? # Country Code Optional [\s\.]? (([2-9]\d{2})|\(([2-9]\d{2})\)) # Area Code [\s\.\-]? ([2-9]\d{2}) # Exchange Code [\s\.\-]? (\d{4}) # Subscriber Number"#, )?; let phone_numbers = re.captures_iter(phone_text).filter_map(|cap| { let groups = (cap.get(2).or(cap.get(3)), cap.get(4), cap.get(5)); match groups { (Some(area), Some(ext), Some(sub)) => Some(PhoneNumber { area: area.as_str(), exchange: ext.as_str(), subscriber: sub.as_str(), }), _ => None, } }); assert_eq!( phone_numbers.map(|m| m.to_string()).collect::<Vec<_>>(), vec![ "1 (505) 881-9292", "1 (505) 778-2212", "1 (505) 881-9297", "1 (202) 991-9534", "1 (555) 392-0011", "1 (800) 233-2010", "1 (299) 339-1020", ] ); Ok(())}
Filter a log file by matching multiple regular expressions
Reads a file namedapplication.log
and only outputs the linescontaining "version X.X.X", some IP address followed by port 443(e.g. "192.168.0.1:443"), or a specific warning.
Aregex::RegexSetBuilder
composes aregex::RegexSet
.Since backslashes are very common in regular expressions, usingraw string literals makes them more readable.
use anyhow::Result;use std::fs::File;use std::io::{BufReader, BufRead};use regex::RegexSetBuilder;fn main() -> Result<()> { let log_path = "application.log"; let buffered = BufReader::new(File::open(log_path)?); let set = RegexSetBuilder::new(&[ r#"version "\d\.\d\.\d""#, r#"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:443"#, r#"warning.*timeout expired"#, ]).case_insensitive(true) .build()?; buffered .lines() .filter_map(|line| line.ok()) .filter(|line| set.is_match(line.as_str())) .for_each(|x| println!("{}", x)); Ok(())}
Replace all occurrences of one text pattern with another pattern.
Replaces all occurrences of the standard ISO 8601YYYY-MM-DD date patternwith the equivalent American English date with slashes.For example2013-01-15
becomes01/15/2013
.
The methodRegex::replace_all
replaces all occurrences of the whole regex.&str
implements theReplacer
trait which allows variables like$abcde
torefer to corresponding named capture groups(?P<abcde>REGEX)
from the searchregex. See thereplacement string syntax for examples and escaping detail.
use lazy_static::lazy_static;use std::borrow::Cow;use regex::Regex;fn reformat_dates(before: &str) -> Cow<str> { lazy_static! { static ref ISO8601_DATE_REGEX : Regex = Regex::new( r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})" ).unwrap(); } ISO8601_DATE_REGEX.replace_all(before, "$m/$d/$y")}fn main() { let before = "2012-03-14, 2013-01-15 and 2014-07-05"; let after = reformat_dates(before); assert_eq!(after, "03/14/2012, 01/15/2013 and 07/05/2014");}