Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork32.3k
gh-83938, gh-122476: Stop incorrectly RFC 2047 encoding non-ASCII email addresses#122540
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
Uh oh!
There was an error while loading.Please reload this page.
Changes from4 commits
0abca3e
faa4006
f8f5eef
99914e3
819c0bc
bd6845d
61ac0d7
43eaea1
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -2829,6 +2829,17 @@ def _refold_parse_tree(parse_tree, *, policy): | ||
_fold_mime_parameters(part, lines, maxlen, encoding) | ||
continue | ||
if want_encoding and part.token_type == 'addr-spec': | ||
# RFC2047 forbids encoded-word in any part of an addr-spec. | ||
if charset == 'unknown-8bit': | ||
# Non-ASCII addr-spec came from parsed message; leave unchanged. | ||
want_encoding = False | ||
else: | ||
raise ValueError( | ||
medmunds marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
"Non-ASCII address requires policy with utf8=True:" | ||
" '{}'".format(part) | ||
medmunds marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
) | ||
if want_encoding and not wrap_as_ew_blocked: | ||
if not part.as_ew_allowed: | ||
want_encoding = False | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import io | ||
import re | ||
import textwrap | ||
import unittest | ||
from email import message_from_string, message_from_bytes | ||
@@ -288,6 +289,28 @@ def test_keep_long_encoded_newlines(self): | ||
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), self.typ(expected)) | ||
def test_non_ascii_addr_spec_raises(self): | ||
# RFC2047 encoded-word is not permitted in any part of an addr-spec. | ||
# (See also test_non_ascii_addr_spec_preserved below.) | ||
g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False)) | ||
cases = [ | ||
'wők@example.com', | ||
'wok@exàmple.com', | ||
'wők@exàmple.com', | ||
'"Name, for display" <wők@example.com>', | ||
'Näyttönimi <wők@example.com>', | ||
] | ||
for address in cases: | ||
with self.subTest(address=address): | ||
msg = EmailMessage() | ||
msg['To'] = address | ||
expected_error = re.escape( | ||
"Non-ASCII address requires policy with utf8=True:" | ||
" '{}'".format(msg['To'].addresses[0].addr_spec) | ||
medmunds marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
) | ||
with self.assertRaisesRegex(ValueError, expected_error): | ||
g.flatten(msg) | ||
class TestGenerator(TestGeneratorBase, TestEmailBase): | ||
@@ -432,12 +455,12 @@ def test_cte_type_7bit_transforms_8bit_cte(self): | ||
def test_smtputf8_policy(self): | ||
msg = EmailMessage() | ||
msg['From'] = "Páolo <főo@bàr.com>" | ||
msg['To'] = 'Dinsdale' | ||
msg['Subject'] = 'Nudge nudge, wink, wink \u1F609' | ||
msg.set_content("oh là là, know what I mean, know what I mean?") | ||
expected = textwrap.dedent("""\ | ||
From: Páolo <főo@bàr.com> | ||
To: Dinsdale | ||
Subject: Nudge nudge, wink, wink \u1F609 | ||
Content-Type: text/plain; charset="utf-8" | ||
@@ -472,6 +495,37 @@ def test_smtp_policy(self): | ||
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), expected) | ||
def test_non_ascii_addr_spec_preserved(self): | ||
# A defective non-ASCII addr-spec parsed from the original | ||
# message is left unchanged when flattening. | ||
# (See also test_non_ascii_addr_spec_raises above.) | ||
source = ( | ||
'To: jörg@example.com, "But a long name still works with refold_source" <jörg@example.com>' | ||
).encode() | ||
expected = ( | ||
b'To: j\xc3\xb6rg@example.com,\n' | ||
b' "But a long name still works with refold_source" <j\xc3\xb6rg@example.com>\n' | ||
b'\n' | ||
) | ||
msg = message_from_bytes(source, policy=policy.default) | ||
s = io.BytesIO() | ||
g = BytesGenerator(s, policy=policy.default) | ||
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), expected) | ||
def test_idna_encoding_preserved(self): | ||
# Nothing tries to decode a pre-encoded IDNA domain. | ||
msg = EmailMessage() | ||
msg["To"] = Address( | ||
username='jörg', | ||
domain='☕.example'.encode('idna').decode() # IDNA 2003 | ||
) | ||
expected = 'To: jörg@xn--53h.example\n\n'.encode() | ||
s = io.BytesIO() | ||
g = BytesGenerator(s, policy=policy.default.clone(utf8=True)) | ||
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), expected) | ||
if __name__ == '__main__': | ||
unittest.main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Stop incorrectly using RFC 2047 "encoded words" for email addresses with | ||
non-ASCII characters when email.generator is called using a policy with | ||
``utf8=False``. | ||
medmunds marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Stop incorrectly using RFC 2047 "encoded words" for email addresses with | ||
non-ASCII characters when email.generator is called using a policy with | ||
``utf8=False``. |
Uh oh!
There was an error while loading.Please reload this page.