|
62 | 62 |
|
63 | 63 | # regex based decoder.
|
64 | 64 | _q_byte_subber=functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
|
65 |
| -lambdam:bytes([int(m.group(1),16)])) |
| 65 | +lambdam:bytes.fromhex(m.group(1).decode())) |
66 | 66 |
|
67 | 67 | defdecode_q(encoded):
|
68 | 68 | encoded=encoded.replace(b'_',b' ')
|
@@ -98,30 +98,42 @@ def len_q(bstring):
|
98 | 98 | #
|
99 | 99 |
|
100 | 100 | defdecode_b(encoded):
|
101 |
| -defects= [] |
| 101 | +# First try encoding with validate=True, fixing the padding if needed. |
| 102 | +# This will succeed only if encoded includes no invalid characters. |
102 | 103 | pad_err=len(encoded)%4
|
103 |
| -ifpad_err: |
104 |
| -defects.append(errors.InvalidBase64PaddingDefect()) |
105 |
| -padded_encoded=encoded+b'==='[:4-pad_err] |
106 |
| -else: |
107 |
| -padded_encoded=encoded |
| 104 | +missing_padding=b'==='[:4-pad_err]ifpad_errelseb'' |
108 | 105 | try:
|
109 |
| -returnbase64.b64decode(padded_encoded,validate=True),defects |
| 106 | +return ( |
| 107 | +base64.b64decode(encoded+missing_padding,validate=True), |
| 108 | + [errors.InvalidBase64PaddingDefect()]ifpad_errelse [], |
| 109 | + ) |
110 | 110 | exceptbinascii.Error:
|
111 |
| -# Since we had correct padding, thismust an invalid char error. |
112 |
| -defects= [errors.InvalidBase64CharactersDefect()] |
| 111 | +# Since we had correct padding, thisis likely an invalid char error. |
| 112 | +# |
113 | 113 | # The non-alphabet characters are ignored as far as padding
|
114 |
| -# goes, but we don't know how many there are. So we'll just |
115 |
| -# try various padding lengths until something works. |
116 |
| -foriin0,1,2,3: |
| 114 | +# goes, but we don't know how many there are. So try without adding |
| 115 | +# padding to see if it works. |
| 116 | +try: |
| 117 | +return ( |
| 118 | +base64.b64decode(encoded,validate=False), |
| 119 | + [errors.InvalidBase64CharactersDefect()], |
| 120 | + ) |
| 121 | +exceptbinascii.Error: |
| 122 | +# Add as much padding as could possibly be necessary (extra padding |
| 123 | +# is ignored). |
117 | 124 | try:
|
118 |
| -returnbase64.b64decode(encoded+b'='*i,validate=False),defects |
| 125 | +return ( |
| 126 | +base64.b64decode(encoded+b'==',validate=False), |
| 127 | + [errors.InvalidBase64CharactersDefect(), |
| 128 | +errors.InvalidBase64PaddingDefect()], |
| 129 | + ) |
119 | 130 | exceptbinascii.Error:
|
120 |
| -ifi==0: |
121 |
| -defects.append(errors.InvalidBase64PaddingDefect()) |
122 |
| -else: |
123 |
| -# This should never happen. |
124 |
| -raiseAssertionError("unexpected binascii.Error") |
| 131 | +# This only happens when the encoded string's length is 1 more |
| 132 | +# than a multiple of 4, which is invalid. |
| 133 | +# |
| 134 | +# bpo-27397: Just return the encoded string since there's no |
| 135 | +# way to decode. |
| 136 | +returnencoded, [errors.InvalidBase64LengthDefect()] |
125 | 137 |
|
126 | 138 | defencode_b(bstring):
|
127 | 139 | returnbase64.b64encode(bstring).decode('ascii')
|
@@ -167,15 +179,15 @@ def decode(ew):
|
167 | 179 | # Turn the CTE decoded bytes into unicode.
|
168 | 180 | try:
|
169 | 181 | string=bstring.decode(charset)
|
170 |
| -exceptUnicodeError: |
| 182 | +exceptUnicodeDecodeError: |
171 | 183 | defects.append(errors.UndecodableBytesDefect("Encoded word "
|
172 |
| -"contains bytes not decodable using {} charset".format(charset))) |
| 184 | +f"contains bytes not decodable using{charset!r} charset")) |
173 | 185 | string=bstring.decode(charset,'surrogateescape')
|
174 |
| -exceptLookupError: |
| 186 | +except(LookupError,UnicodeEncodeError): |
175 | 187 | string=bstring.decode('ascii','surrogateescape')
|
176 | 188 | ifcharset.lower()!='unknown-8bit':
|
177 |
| -defects.append(errors.CharsetError("Unknown charset {} " |
178 |
| -"in encoded word; decoded as unknown bytes".format(charset))) |
| 189 | +defects.append(errors.CharsetError(f"Unknown charset{charset!r} " |
| 190 | +f"in encoded word; decoded as unknown bytes")) |
179 | 191 | returnstring,charset,lang,defects
|
180 | 192 |
|
181 | 193 |
|
|