Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitffe9ba0

Browse files
[3.12]gh-92081: Fix for email.generator.Generator with whitespace between encoded words. (GH-92281) (#119246)
* Fix for email.generator.Generator with whitespace between encoded words.email.generator.Generator currently does not handle whitespace betweenencoded words correctly when the encoded words span multiple lines. Thecurrent generator will create an encoded word for each line. If the endof the line happens to correspond with the end real word in theplaintext, the generator will place an unencoded space at the start ofthe subsequent lines to represent the whitespace between the plaintextwords.A compliant decoder will strip all the whitespace from between twoencoded words which leads to missing spaces in the round-trippedoutput.The fix for this is to make sure that whitespace between two encodedwords ends up inside of one or the other of the encoded words. Thisfix places the space inside of the second encoded word.A second problem happens with continuation lines. A continuation line thatstarts with whitespace and is followed by a non-encoded word is fine becausethe newline between such continuation lines is defined as condensing toa single space character. When the continuation line starts with whitespacefollowed by an encoded word, however, the RFCs specify that the word is runtogether with the encoded word on the previous line. This is because normalwords are filded on syntactic breaks by encoded words are not.The solution to this is to add the whitespace to the start of the encoded wordon the continuation line.Test cases are fromGH-92081* Rename a variable so it's not confused with the final variable.(cherry picked from commita6fdb31)Co-authored-by: Toshio Kuratomi <a.badger@gmail.com>
1 parent386e492 commitffe9ba0

File tree

4 files changed

+79
-8
lines changed

4 files changed

+79
-8
lines changed

‎Lib/email/_header_value_parser.py‎

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2784,11 +2784,15 @@ def _refold_parse_tree(parse_tree, *, policy):
27842784
# max_line_length 0/None means no limit, ie: infinitely long.
27852785
maxlen=policy.max_line_lengthorsys.maxsize
27862786
encoding='utf-8'ifpolicy.utf8else'us-ascii'
2787-
lines= ['']
2788-
last_ew=None
2787+
lines= ['']# Folded lines to be output
2788+
leading_whitespace=''# When we have whitespace between two encoded
2789+
# words, we may need to encode the whitespace
2790+
# at the beginning of the second word.
2791+
last_ew=None# Points to the last encoded character if there's an ew on
2792+
# the line
27892793
last_charset=None
27902794
wrap_as_ew_blocked=0
2791-
want_encoding=False
2795+
want_encoding=False# This is set to True if we need to encode this part
27922796
end_ew_not_allowed=Terminal('','wrap_as_ew_blocked')
27932797
parts=list(parse_tree)
27942798
whileparts:
@@ -2812,10 +2816,12 @@ def _refold_parse_tree(parse_tree, *, policy):
28122816
# 'charset' property on the policy.
28132817
charset='utf-8'
28142818
want_encoding=True
2819+
28152820
ifpart.token_type=='mime-parameters':
28162821
# Mime parameter folding (using RFC2231) is extra special.
28172822
_fold_mime_parameters(part,lines,maxlen,encoding)
28182823
continue
2824+
28192825
ifwant_encodingandnotwrap_as_ew_blocked:
28202826
ifnotpart.as_ew_allowed:
28212827
want_encoding=False
@@ -2847,21 +2853,38 @@ def _refold_parse_tree(parse_tree, *, policy):
28472853
last_charset=='utf-8'andcharset!='us-ascii')):
28482854
last_ew=None
28492855
last_ew=_fold_as_ew(tstr,lines,maxlen,last_ew,
2850-
part.ew_combine_allowed,charset)
2856+
part.ew_combine_allowed,charset,leading_whitespace)
2857+
# This whitespace has been added to the lines in _fold_as_ew()
2858+
# so clear it now.
2859+
leading_whitespace=''
28512860
last_charset=charset
28522861
want_encoding=False
28532862
continue
2863+
28542864
iflen(tstr)<=maxlen-len(lines[-1]):
28552865
lines[-1]+=tstr
28562866
continue
2867+
28572868
# This part is too long to fit. The RFC wants us to break at
28582869
# "major syntactic breaks", so unless we don't consider this
28592870
# to be one, check if it will fit on the next line by itself.
2871+
leading_whitespace=''
28602872
if (part.syntactic_breakand
28612873
len(tstr)+1<=maxlen):
28622874
newline=_steal_trailing_WSP_if_exists(lines)
28632875
ifnewlineorpart.startswith_fws():
2876+
# We're going to fold the data onto a new line here. Due to
2877+
# the way encoded strings handle continuation lines, we need to
2878+
# be prepared to encode any whitespace if the next line turns
2879+
# out to start with an encoded word.
28642880
lines.append(newline+tstr)
2881+
2882+
whitespace_accumulator= []
2883+
forcharinlines[-1]:
2884+
ifcharnotinWSP:
2885+
break
2886+
whitespace_accumulator.append(char)
2887+
leading_whitespace=''.join(whitespace_accumulator)
28652888
last_ew=None
28662889
continue
28672890
ifnothasattr(part,'encode'):
@@ -2885,9 +2908,10 @@ def _refold_parse_tree(parse_tree, *, policy):
28852908
else:
28862909
# We can't fold it onto the next line either...
28872910
lines[-1]+=tstr
2911+
28882912
returnpolicy.linesep.join(lines)+policy.linesep
28892913

2890-
def_fold_as_ew(to_encode,lines,maxlen,last_ew,ew_combine_allowed,charset):
2914+
def_fold_as_ew(to_encode,lines,maxlen,last_ew,ew_combine_allowed,charset,leading_whitespace):
28912915
"""Fold string to_encode into lines as encoded word, combining if allowed.
28922916
Return the new value for last_ew, or None if ew_combine_allowed is False.
28932917
@@ -2902,14 +2926,15 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
29022926
to_encode=str(
29032927
get_unstructured(lines[-1][last_ew:]+to_encode))
29042928
lines[-1]=lines[-1][:last_ew]
2905-
ifto_encode[0]inWSP:
2929+
elifto_encode[0]inWSP:
29062930
# We're joining this to non-encoded text, so don't encode
29072931
# the leading blank.
29082932
leading_wsp=to_encode[0]
29092933
to_encode=to_encode[1:]
29102934
if (len(lines[-1])==maxlen):
29112935
lines.append(_steal_trailing_WSP_if_exists(lines))
29122936
lines[-1]+=leading_wsp
2937+
29132938
trailing_wsp=''
29142939
ifto_encode[-1]inWSP:
29152940
# Likewise for the trailing space.
@@ -2929,11 +2954,20 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
29292954

29302955
whileto_encode:
29312956
remaining_space=maxlen-len(lines[-1])
2932-
text_space=remaining_space-chrome_len
2957+
text_space=remaining_space-chrome_len-len(leading_whitespace)
29332958
iftext_space<=0:
29342959
lines.append(' ')
29352960
continue
29362961

2962+
# If we are at the start of a continuation line, prepend whitespace
2963+
# (we only want to do this when the line starts with an encoded word
2964+
# but if we're folding in this helper function, then we know that we
2965+
# are going to be writing out an encoded word.)
2966+
iflen(lines)>1andlen(lines[-1])==1andleading_whitespace:
2967+
encoded_word=_ew.encode(leading_whitespace,charset=encode_as)
2968+
lines[-1]+=encoded_word
2969+
leading_whitespace=''
2970+
29372971
to_encode_word=to_encode[:text_space]
29382972
encoded_word=_ew.encode(to_encode_word,charset=encode_as)
29392973
excess=len(encoded_word)-remaining_space

‎Lib/test/test_email/test_generator.py‎

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,41 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
281281
ioclass=io.BytesIO
282282
typ=lambdaself,x:x.encode('ascii')
283283

284+
deftest_defaults_handle_spaces_between_encoded_words_when_folded(self):
285+
source= ("Уведомление о принятии в работу обращения для"
286+
" подключения услуги")
287+
expected= ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n'
288+
' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n'
289+
' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii')
290+
msg=EmailMessage()
291+
msg['Subject']=source
292+
s=io.BytesIO()
293+
g=BytesGenerator(s)
294+
g.flatten(msg)
295+
self.assertEqual(s.getvalue(),expected)
296+
297+
deftest_defaults_handle_spaces_at_start_of_subject(self):
298+
source=" Уведомление"
299+
expected=b"Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtQ==?=\n\n"
300+
msg=EmailMessage()
301+
msg['Subject']=source
302+
s=io.BytesIO()
303+
g=BytesGenerator(s)
304+
g.flatten(msg)
305+
self.assertEqual(s.getvalue(),expected)
306+
307+
deftest_defaults_handle_spaces_at_start_of_continuation_line(self):
308+
source=" ф ффффффффффффффффффф ф ф"
309+
expected= (b"Subject: "
310+
b"=?utf-8?b?0YQg0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YQ=?=\n"
311+
b" =?utf-8?b?INGEINGE?=\n\n")
312+
msg=EmailMessage()
313+
msg['Subject']=source
314+
s=io.BytesIO()
315+
g=BytesGenerator(s)
316+
g.flatten(msg)
317+
self.assertEqual(s.getvalue(),expected)
318+
284319
deftest_cte_type_7bit_handles_unknown_8bit(self):
285320
source= ("Subject: Maintenant je vous présente mon "
286321
"collègue\n\n").encode('utf-8')

‎Lib/test/test_email/test_headerregistry.py‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
fromtest.test_emailimportTestEmailBase,parameterize
88
fromemailimportheaderregistry
99
fromemail.headerregistryimportAddress,Group
10+
fromemail.headerimportdecode_header
1011
fromtest.supportimportALWAYS_EQ
1112

1213

@@ -1648,7 +1649,7 @@ def test_address_display_names(self):
16481649
'Lôrem ipsum dôlôr sit amet, cônsectetuer adipiscing. '
16491650
'Suspendisse pôtenti. Aliquam nibh. Suspendisse pôtenti.',
16501651
'=?utf-8?q?L=C3=B4rem_ipsum_d=C3=B4l=C3=B4r_sit_amet=2C_c'
1651-
'=C3=B4nsectetuer?=\n =?utf-8?q?adipiscing=2E_Suspendisse'
1652+
'=C3=B4nsectetuer?=\n =?utf-8?q?_adipiscing=2E_Suspendisse'
16521653
'_p=C3=B4tenti=2E_Aliquam_nibh=2E?=\n Suspendisse =?utf-8'
16531654
'?q?p=C3=B4tenti=2E?=',
16541655
),
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix missing spaces in email headers when the spaces are mixed with encoded 8-bit characters.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp