Commitffe9ba0

miss-islington

and

abadger

authored

[3.12]gh-92081: Fix for email.generator.Generator with whitespace between encoded words. (GH-92281) (#119246)

* Fix for email.generator.Generator with whitespace between encoded words.email.generator.Generator currently does not handle whitespace betweenencoded words correctly when the encoded words span multiple lines. Thecurrent generator will create an encoded word for each line. If the endof the line happens to correspond with the end real word in theplaintext, the generator will place an unencoded space at the start ofthe subsequent lines to represent the whitespace between the plaintextwords.A compliant decoder will strip all the whitespace from between twoencoded words which leads to missing spaces in the round-trippedoutput.The fix for this is to make sure that whitespace between two encodedwords ends up inside of one or the other of the encoded words. Thisfix places the space inside of the second encoded word.A second problem happens with continuation lines. A continuation line thatstarts with whitespace and is followed by a non-encoded word is fine becausethe newline between such continuation lines is defined as condensing toa single space character. When the continuation line starts with whitespacefollowed by an encoded word, however, the RFCs specify that the word is runtogether with the encoded word on the previous line. This is because normalwords are filded on syntactic breaks by encoded words are not.The solution to this is to add the whitespace to the start of the encoded wordon the continuation line.Test cases are fromGH-92081* Rename a variable so it's not confused with the final variable.(cherry picked from commita6fdb31)Co-authored-by: Toshio Kuratomi <a.badger@gmail.com>

1 parent386e492 commitffe9ba0Copy full SHA for ffe9ba0

File tree

4 files changed

+79

-8

lines changed

Lib
- email
  - _header_value_parser.py
- test/test_email
  - test_generator.py
  - test_headerregistry.py
Misc/NEWS.d/next/Library
- 2023-04-26-22-24-17.gh-issue-92081.V8xMot.rst

4 files changed

+79

-8

lines changed

`‎Lib/email/_header_value_parser.py‎`

Lines changed: 41 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -2784,11 +2784,15 @@ def _refold_parse_tree(parse_tree, *, policy):`
`2784`	`2784`	`# max_line_length 0/None means no limit, ie: infinitely long.`
`2785`	`2785`	`maxlen=policy.max_line_lengthorsys.maxsize`
`2786`	`2786`	`encoding='utf-8'ifpolicy.utf8else'us-ascii'`
`2787`		`-lines= ['']`
`2788`		`-last_ew=None`
	`2787`	`+lines= ['']# Folded lines to be output`
	`2788`	`+leading_whitespace=''# When we have whitespace between two encoded`
	`2789`	`+# words, we may need to encode the whitespace`
	`2790`	`+# at the beginning of the second word.`
	`2791`	`+last_ew=None# Points to the last encoded character if there's an ew on`
	`2792`	`+# the line`
`2789`	`2793`	`last_charset=None`
`2790`	`2794`	`wrap_as_ew_blocked=0`
`2791`		`-want_encoding=False`
	`2795`	`+want_encoding=False# This is set to True if we need to encode this part`
`2792`	`2796`	`end_ew_not_allowed=Terminal('','wrap_as_ew_blocked')`
`2793`	`2797`	`parts=list(parse_tree)`
`2794`	`2798`	`whileparts:`
`@@ -2812,10 +2816,12 @@ def _refold_parse_tree(parse_tree, *, policy):`
`2812`	`2816`	`# 'charset' property on the policy.`
`2813`	`2817`	`charset='utf-8'`
`2814`	`2818`	`want_encoding=True`
	`2819`	`+`
`2815`	`2820`	`ifpart.token_type=='mime-parameters':`
`2816`	`2821`	`# Mime parameter folding (using RFC2231) is extra special.`
`2817`	`2822`	`_fold_mime_parameters(part,lines,maxlen,encoding)`
`2818`	`2823`	`continue`
	`2824`	`+`
`2819`	`2825`	`ifwant_encodingandnotwrap_as_ew_blocked:`
`2820`	`2826`	`ifnotpart.as_ew_allowed:`
`2821`	`2827`	`want_encoding=False`
`@@ -2847,21 +2853,38 @@ def _refold_parse_tree(parse_tree, *, policy):`
`2847`	`2853`	`last_charset=='utf-8'andcharset!='us-ascii')):`
`2848`	`2854`	`last_ew=None`
`2849`	`2855`	`last_ew=_fold_as_ew(tstr,lines,maxlen,last_ew,`
`2850`		`-part.ew_combine_allowed,charset)`
	`2856`	`+part.ew_combine_allowed,charset,leading_whitespace)`
	`2857`	`+# This whitespace has been added to the lines in _fold_as_ew()`
	`2858`	`+# so clear it now.`
	`2859`	`+leading_whitespace=''`
`2851`	`2860`	`last_charset=charset`
`2852`	`2861`	`want_encoding=False`
`2853`	`2862`	`continue`
	`2863`	`+`
`2854`	`2864`	`iflen(tstr)<=maxlen-len(lines[-1]):`
`2855`	`2865`	`lines[-1]+=tstr`
`2856`	`2866`	`continue`
	`2867`	`+`
`2857`	`2868`	`# This part is too long to fit. The RFC wants us to break at`
`2858`	`2869`	`# "major syntactic breaks", so unless we don't consider this`
`2859`	`2870`	`# to be one, check if it will fit on the next line by itself.`
	`2871`	`+leading_whitespace=''`
`2860`	`2872`	`if (part.syntactic_breakand`
`2861`	`2873`	`len(tstr)+1<=maxlen):`
`2862`	`2874`	`newline=_steal_trailing_WSP_if_exists(lines)`
`2863`	`2875`	`ifnewlineorpart.startswith_fws():`
	`2876`	`+# We're going to fold the data onto a new line here. Due to`
	`2877`	`+# the way encoded strings handle continuation lines, we need to`
	`2878`	`+# be prepared to encode any whitespace if the next line turns`
	`2879`	`+# out to start with an encoded word.`
`2864`	`2880`	`lines.append(newline+tstr)`
	`2881`	`+`
	`2882`	`+whitespace_accumulator= []`
	`2883`	`+forcharinlines[-1]:`
	`2884`	`+ifcharnotinWSP:`
	`2885`	`+break`
	`2886`	`+whitespace_accumulator.append(char)`
	`2887`	`+leading_whitespace=''.join(whitespace_accumulator)`
`2865`	`2888`	`last_ew=None`
`2866`	`2889`	`continue`
`2867`	`2890`	`ifnothasattr(part,'encode'):`
`@@ -2885,9 +2908,10 @@ def _refold_parse_tree(parse_tree, *, policy):`
`2885`	`2908`	`else:`
`2886`	`2909`	`# We can't fold it onto the next line either...`
`2887`	`2910`	`lines[-1]+=tstr`
	`2911`	`+`
`2888`	`2912`	`returnpolicy.linesep.join(lines)+policy.linesep`
`2889`	`2913`
`2890`		`-def_fold_as_ew(to_encode,lines,maxlen,last_ew,ew_combine_allowed,charset):`
	`2914`	`+def_fold_as_ew(to_encode,lines,maxlen,last_ew,ew_combine_allowed,charset,leading_whitespace):`
`2891`	`2915`	`"""Fold string to_encode into lines as encoded word, combining if allowed.`
`2892`	`2916`	`Return the new value for last_ew, or None if ew_combine_allowed is False.`
`2893`	`2917`
`@@ -2902,14 +2926,15 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):`
`2902`	`2926`	`to_encode=str(`
`2903`	`2927`	`get_unstructured(lines[-1][last_ew:]+to_encode))`
`2904`	`2928`	`lines[-1]=lines[-1][:last_ew]`
`2905`		`-ifto_encode[0]inWSP:`
	`2929`	`+elifto_encode[0]inWSP:`
`2906`	`2930`	`# We're joining this to non-encoded text, so don't encode`
`2907`	`2931`	`# the leading blank.`
`2908`	`2932`	`leading_wsp=to_encode[0]`
`2909`	`2933`	`to_encode=to_encode[1:]`
`2910`	`2934`	`if (len(lines[-1])==maxlen):`
`2911`	`2935`	`lines.append(_steal_trailing_WSP_if_exists(lines))`
`2912`	`2936`	`lines[-1]+=leading_wsp`
	`2937`	`+`
`2913`	`2938`	`trailing_wsp=''`
`2914`	`2939`	`ifto_encode[-1]inWSP:`
`2915`	`2940`	`# Likewise for the trailing space.`
`@@ -2929,11 +2954,20 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):`
`2929`	`2954`
`2930`	`2955`	`whileto_encode:`
`2931`	`2956`	`remaining_space=maxlen-len(lines[-1])`
`2932`		`-text_space=remaining_space-chrome_len`
	`2957`	`+text_space=remaining_space-chrome_len-len(leading_whitespace)`
`2933`	`2958`	`iftext_space<=0:`
`2934`	`2959`	`lines.append(' ')`
`2935`	`2960`	`continue`
`2936`	`2961`
	`2962`	`+# If we are at the start of a continuation line, prepend whitespace`
	`2963`	`+# (we only want to do this when the line starts with an encoded word`
	`2964`	`+# but if we're folding in this helper function, then we know that we`
	`2965`	`+# are going to be writing out an encoded word.)`
	`2966`	`+iflen(lines)>1andlen(lines[-1])==1andleading_whitespace:`
	`2967`	`+encoded_word=_ew.encode(leading_whitespace,charset=encode_as)`
	`2968`	`+lines[-1]+=encoded_word`
	`2969`	`+leading_whitespace=''`
	`2970`	`+`
`2937`	`2971`	`to_encode_word=to_encode[:text_space]`
`2938`	`2972`	`encoded_word=_ew.encode(to_encode_word,charset=encode_as)`
`2939`	`2973`	`excess=len(encoded_word)-remaining_space`

`‎Lib/test/test_email/test_generator.py‎`

Lines changed: 35 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -281,6 +281,41 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase):`
`281`	`281`	`ioclass=io.BytesIO`
`282`	`282`	`typ=lambdaself,x:x.encode('ascii')`
`283`	`283`
	`284`	`+deftest_defaults_handle_spaces_between_encoded_words_when_folded(self):`
	`285`	`+source= ("Уведомление о принятии в работу обращения для"`
	`286`	`+" подключения услуги")`
	`287`	`+expected= ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n'`
	`288`	`+' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n'`
	`289`	`+' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii')`
	`290`	`+msg=EmailMessage()`
	`291`	`+msg['Subject']=source`
	`292`	`+s=io.BytesIO()`
	`293`	`+g=BytesGenerator(s)`
	`294`	`+g.flatten(msg)`
	`295`	`+self.assertEqual(s.getvalue(),expected)`
	`296`	`+`
	`297`	`+deftest_defaults_handle_spaces_at_start_of_subject(self):`
	`298`	`+source=" Уведомление"`
	`299`	`+expected=b"Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtQ==?=\n\n"`
	`300`	`+msg=EmailMessage()`
	`301`	`+msg['Subject']=source`
	`302`	`+s=io.BytesIO()`
	`303`	`+g=BytesGenerator(s)`
	`304`	`+g.flatten(msg)`
	`305`	`+self.assertEqual(s.getvalue(),expected)`
	`306`	`+`
	`307`	`+deftest_defaults_handle_spaces_at_start_of_continuation_line(self):`
	`308`	`+source=" ф ффффффффффффффффффф ф ф"`
	`309`	`+expected= (b"Subject: "`
	`310`	`+b"=?utf-8?b?0YQg0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YQ=?=\n"`
	`311`	`+b" =?utf-8?b?INGEINGE?=\n\n")`
	`312`	`+msg=EmailMessage()`
	`313`	`+msg['Subject']=source`
	`314`	`+s=io.BytesIO()`
	`315`	`+g=BytesGenerator(s)`
	`316`	`+g.flatten(msg)`
	`317`	`+self.assertEqual(s.getvalue(),expected)`
	`318`	`+`
`284`	`319`	`deftest_cte_type_7bit_handles_unknown_8bit(self):`
`285`	`320`	`source= ("Subject: Maintenant je vous présente mon "`
`286`	`321`	`"collègue\n\n").encode('utf-8')`

`‎Lib/test/test_email/test_headerregistry.py‎`

Lines changed: 2 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`	`fromtest.test_emailimportTestEmailBase,parameterize`
`8`	`8`	`fromemailimportheaderregistry`
`9`	`9`	`fromemail.headerregistryimportAddress,Group`
	`10`	`+fromemail.headerimportdecode_header`
`10`	`11`	`fromtest.supportimportALWAYS_EQ`
`11`	`12`
`12`	`13`
`@@ -1648,7 +1649,7 @@ def test_address_display_names(self):`
`1648`	`1649`	`'Lôrem ipsum dôlôr sit amet, cônsectetuer adipiscing. '`
`1649`	`1650`	`'Suspendisse pôtenti. Aliquam nibh. Suspendisse pôtenti.',`
`1650`	`1651`	`'=?utf-8?q?L=C3=B4rem_ipsum_d=C3=B4l=C3=B4r_sit_amet=2C_c'`
`1651`		`-'=C3=B4nsectetuer?=\n =?utf-8?q?adipiscing=2E_Suspendisse'`
	`1652`	`+'=C3=B4nsectetuer?=\n =?utf-8?q?_adipiscing=2E_Suspendisse'`
`1652`	`1653`	`'_p=C3=B4tenti=2E_Aliquam_nibh=2E?=\n Suspendisse =?utf-8'`
`1653`	`1654`	`'?q?p=C3=B4tenti=2E?=',`
`1654`	`1655`	`),`

`‎Misc/NEWS.d/next/Library/2023-04-26-22-24-17.gh-issue-92081.V8xMot.rst‎`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Fix missing spaces in email headers when the spaces are mixed with encoded 8-bit characters.`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commitffe9ba0

File tree

4 files changed

4 files changed

`‎Lib/email/_header_value_parser.py‎`

`‎Lib/test/test_email/test_generator.py‎`

`‎Lib/test/test_email/test_headerregistry.py‎`

`‎Misc/NEWS.d/next/Library/2023-04-26-22-24-17.gh-issue-92081.V8xMot.rst‎`

0 commit comments