Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitab06a8b

Browse files
committed
fixed: is_email is now complaint with email specifications
1 parentad3c497 commitab06a8b

File tree

3 files changed

+146
-13
lines changed

3 files changed

+146
-13
lines changed

‎string_utils/_regex.py‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222

2323
URLS_RE=re.compile(r'({})'.format(URLS_RAW_STRING),re.IGNORECASE)
2424

25-
EMAILS_RAW_STRING=r'[a-zA-Z\d._+-]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}'
25+
ESCAPED_AT_SIGN=re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')
26+
27+
EMAILS_RAW_STRING=r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"
2628

2729
EMAIL_RE=re.compile(r'^{}$'.format(EMAILS_RAW_STRING))
2830

‎string_utils/validation.py‎

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -200,15 +200,22 @@ def is_url(input_string: Any, allowed_schemes: Optional[List[str]] = None) -> bo
200200
returnvalid
201201

202202

203+
# todo: fix me
204+
'''
205+
That limit is a maximum of 64 characters (octets)
206+
in the "local part" (before the "@") and a maximum of 255 characters
207+
(octets) in the domain part (after the "@") for a total length of 320
208+
characters. Systems that handle email should be prepared to process
209+
addresses which are that long, even though they are rarely
210+
encountered.
211+
'''
212+
213+
203214
defis_email(input_string:Any)->bool:
204215
"""
205-
Check if a string isan email.
216+
Check if a string isa valid email.
206217
207-
By design, the implementation of this checking does not strictly follow the specification for a valid\
208-
email address, but instead it's based on real world cases in order to match more than 99%\
209-
of emails and catch user mistakes. For example the percentage sign "%" is a valid sign for an email,\
210-
but actually no one use it, instead if such sign is found in a string coming from user input (like a\
211-
web form) it's very likely that it's a mistake.
218+
Reference: https://tools.ietf.org/html/rfc3696#section-3
212219
213220
*Examples:*
214221
@@ -219,7 +226,36 @@ def is_email(input_string: Any) -> bool:
219226
:type input_string: str
220227
:return: True if email, false otherwise.
221228
"""
222-
returnis_full_string(input_string)andEMAIL_RE.match(input_string)isnotNone
229+
# first simple "pre check": it must be a non empty string with max len 320 and cannot start with a dot
230+
ifnotis_full_string(input_string)orlen(input_string)>320orinput_string.startswith('.'):
231+
returnFalse
232+
233+
try:
234+
# we expect 2 tokens, one before "@" and one after, otherwise we have an exception and the email is not valid
235+
head,tail=input_string.split('@')
236+
237+
# removes escaped spaces, so that later on the test regex will accept the string
238+
head=head.replace('\\ ','')
239+
ifhead.startswith('"')andhead.endswith('"'):
240+
head=head.replace(' ','')[1:-1]
241+
242+
ifhead.endswith('.')orlen(head)>64orlen(tail)>255:
243+
returnFalse
244+
245+
# multiple consecutive dots are forbidden
246+
if'..'inhead:
247+
returnFalse
248+
249+
returnEMAIL_RE.match(head+'@'+tail)isnotNone
250+
251+
exceptValueError:
252+
# borderline case in which we have multiple "@" signs but the head part is correctly escaped
253+
ifESCAPED_AT_SIGN.search(input_string)isnotNone:
254+
# replace "@" with "a" in the head
255+
sanitized=ESCAPED_AT_SIGN.sub('a',input_string)
256+
returnis_email(sanitized)
257+
258+
returnFalse
223259

224260

225261
defis_credit_card(input_string:Any,card_type:str=None)->bool:

‎tests/test_is_email.py‎

Lines changed: 100 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,9 @@ def test_domain_extension_should_be_letters_only_from_2_to_4_chars(self):
4444
self.assertFalse(is_email('me@foo.___'))
4545
self.assertFalse(is_email('me@foo.toolongext'))
4646

47-
deftest_name_part_cannot_contain_bad_signs(self):
48-
self.assertFalse(is_email('#me#@foo.com'))
49-
self.assertFalse(is_email('me!@foo.com'))
50-
self.assertFalse(is_email('[][]@foo.com'))
51-
self.assertFalse(is_email('john%@john5music.net'))
47+
deftest_name_part_cannot_contain_suqare_brackets(self):
48+
self.assertFalse(is_email('[myemail@foo.com'))
49+
self.assertFalse(is_email('my]email@foo.com'))
5250

5351
deftest_domain_part_cannot_contain_bad_signs(self):
5452
self.assertFalse(is_email('me@#foo#.com'))
@@ -74,3 +72,100 @@ def test_should_accept_valid_emails(self):
7472
self.assertTrue(is_email('foo@domamin.subdomain.com'))
7573
self.assertTrue(is_email('is1email@domain.org'))
7674
self.assertTrue(is_email('UPPER_CASE_EMAIL@somesite.com'))
75+
76+
deftest_max_email_length_is_respected(self):
77+
invalid_email= ('a'*320)+'@gmail.com'
78+
self.assertFalse(is_email(invalid_email))
79+
80+
deftest_local_part_length_is_respected(self):
81+
# max local part is 64 (before "@")
82+
invalid_email= ('a'*65)+'@gmail.com'
83+
self.assertFalse(is_email(invalid_email))
84+
85+
deftest_octects_part_length_is_respected(self):
86+
# max octets part is 255 (after "@")
87+
invalid_email='a@{}.com'.format(255*'x')
88+
self.assertFalse(is_email(invalid_email))
89+
90+
deftest_plus_is_valid_char_in_local_part(self):
91+
self.assertTrue(is_email("my+mail@gmail.com"))
92+
93+
deftest_minus_is_valid_char_in_local_part(self):
94+
self.assertTrue(is_email("my-mail@gmail.com"))
95+
96+
deftest_slash_is_valid_char_in_local_part(self):
97+
self.assertTrue(is_email("my/mail@gmail.com"))
98+
99+
deftest_back_slash_is_valid_char_in_local_part(self):
100+
self.assertTrue(is_email("my\\mail@gmail.com"))
101+
102+
deftest_equal_is_valid_char_in_local_part(self):
103+
self.assertTrue(is_email("my=mail@gmail.com"))
104+
105+
deftest_question_mark_is_valid_char_in_local_part(self):
106+
self.assertTrue(is_email("my?mail@gmail.com"))
107+
108+
deftest_sharp_is_valid_char_in_local_part(self):
109+
self.assertTrue(is_email("my#mail@gmail.com"))
110+
111+
deftest_dollar_is_valid_char_in_local_part(self):
112+
self.assertTrue(is_email("my$mail@gmail.com"))
113+
114+
deftest_and_is_valid_char_in_local_part(self):
115+
self.assertTrue(is_email("my&mail@gmail.com"))
116+
117+
deftest_asterisk_is_valid_char_in_local_part(self):
118+
self.assertTrue(is_email("my*mail@gmail.com"))
119+
120+
deftest_apostrophe_is_valid_char_in_local_part(self):
121+
self.assertTrue(is_email("my'mail@gmail.com"))
122+
123+
deftest_acute_accent_is_valid_char_in_local_part(self):
124+
self.assertTrue(is_email("my`mail@gmail.com"))
125+
126+
deftest_percentage_is_valid_char_in_local_part(self):
127+
self.assertTrue(is_email("my%mail@gmail.com"))
128+
129+
deftest_exclamation_mark_is_valid_char_in_local_part(self):
130+
self.assertTrue(is_email("my!mail@gmail.com"))
131+
132+
deftest_caret_is_valid_char_in_local_part(self):
133+
self.assertTrue(is_email("my^mail@gmail.com"))
134+
135+
deftest_pipe_is_valid_char_in_local_part(self):
136+
self.assertTrue(is_email("my|mail@gmail.com"))
137+
138+
deftest_tilde_is_valid_char_in_local_part(self):
139+
self.assertTrue(is_email("my~mail@gmail.com"))
140+
141+
deftest_curly_braces_are_valid_char_in_local_part(self):
142+
self.assertTrue(is_email("my{mail@gmail.com"))
143+
self.assertTrue(is_email("my}mail@gmail.com"))
144+
self.assertTrue(is_email("{mymail}@gmail.com"))
145+
146+
deftest_local_part_cannot_start_with_period(self):
147+
self.assertFalse(is_email('.myemail@gmail.com'))
148+
149+
deftest_local_part_cannot_end_with_period(self):
150+
self.assertFalse(is_email('myemail.@gmail.com'))
151+
152+
deftest_local_part_cannot_have_multiple_consecutive_periods(self):
153+
self.assertFalse(is_email('my..email@gmail.com'))
154+
self.assertFalse(is_email('my.email...nope@gmail.com'))
155+
156+
deftest_empty_spaces_are_allowed_only_if_escaped(self):
157+
self.assertFalse(is_email('my mail@gmail.com'))
158+
self.assertTrue(is_email('my\\ mail@gmail.com'))
159+
self.assertTrue(is_email('"my mail"@gmail.com'))
160+
161+
deftest_local_part_can_be_quoted(self):
162+
self.assertTrue(is_email('"foo"@example.com'))
163+
164+
deftest_with_quoted_string_multiple_at_are_accepted(self):
165+
self.assertTrue(is_email('"Abc@def"@example.com'))
166+
167+
deftest_with_escape_multiple_at_are_accepted(self):
168+
self.assertTrue(is_email('Abc\\@def@example.com'))
169+
170+
deftest_local_part_can_have_self_escape(self):
171+
self.assertTrue(is_email('Joe.\\\\Blow@example.com'))

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp