|
3 | 3 | from .importsupport# noqa |
4 | 4 |
|
5 | 5 | importcodecs |
6 | | -fromioimportBytesIO |
| 6 | +importsys |
| 7 | +fromioimportBytesIO,StringIO |
| 8 | + |
| 9 | +importpytest |
7 | 10 |
|
8 | 11 | importsix |
9 | 12 | fromsix.movesimporthttp_client,urllib |
10 | 13 |
|
11 | 14 | fromhtml5lib.inputstreamimport (BufferedStream,HTMLInputStream, |
12 | 15 | HTMLUnicodeInputStream,HTMLBinaryInputStream) |
| 16 | +fromhtml5lib.utilsimportsupports_lone_surrogates |
13 | 17 |
|
14 | 18 |
|
15 | 19 | deftest_basic(): |
@@ -211,3 +215,109 @@ def makefile(self, _mode, _bufsize=None): |
211 | 215 | wrapped=urllib.response.addinfourl(source,source.msg,"http://example.com") |
212 | 216 | stream=HTMLInputStream(wrapped) |
213 | 217 | assertstream.charsUntil(" ")=="Text" |
| 218 | + |
| 219 | + |
| 220 | +@pytest.mark.parametrize("inp,num", |
| 221 | + [("\u0000",0), |
| 222 | + ("\u0001",1), |
| 223 | + ("\u0008",1), |
| 224 | + ("\u0009",0), |
| 225 | + ("\u000A",0), |
| 226 | + ("\u000B",1), |
| 227 | + ("\u000C",0), |
| 228 | + ("\u000D",0), |
| 229 | + ("\u000E",1), |
| 230 | + ("\u001F",1), |
| 231 | + ("\u0020",0), |
| 232 | + ("\u007E",0), |
| 233 | + ("\u007F",1), |
| 234 | + ("\u009F",1), |
| 235 | + ("\u00A0",0), |
| 236 | + ("\uFDCF",0), |
| 237 | + ("\uFDD0",1), |
| 238 | + ("\uFDEF",1), |
| 239 | + ("\uFDF0",0), |
| 240 | + ("\uFFFD",0), |
| 241 | + ("\uFFFE",1), |
| 242 | + ("\uFFFF",1), |
| 243 | + ("\U0001FFFD",0), |
| 244 | + ("\U0001FFFE",1), |
| 245 | + ("\U0001FFFF",1), |
| 246 | + ("\U0002FFFD",0), |
| 247 | + ("\U0002FFFE",1), |
| 248 | + ("\U0002FFFF",1), |
| 249 | + ("\U0003FFFD",0), |
| 250 | + ("\U0003FFFE",1), |
| 251 | + ("\U0003FFFF",1), |
| 252 | + ("\U0004FFFD",0), |
| 253 | + ("\U0004FFFE",1), |
| 254 | + ("\U0004FFFF",1), |
| 255 | + ("\U0005FFFD",0), |
| 256 | + ("\U0005FFFE",1), |
| 257 | + ("\U0005FFFF",1), |
| 258 | + ("\U0006FFFD",0), |
| 259 | + ("\U0006FFFE",1), |
| 260 | + ("\U0006FFFF",1), |
| 261 | + ("\U0007FFFD",0), |
| 262 | + ("\U0007FFFE",1), |
| 263 | + ("\U0007FFFF",1), |
| 264 | + ("\U0008FFFD",0), |
| 265 | + ("\U0008FFFE",1), |
| 266 | + ("\U0008FFFF",1), |
| 267 | + ("\U0009FFFD",0), |
| 268 | + ("\U0009FFFE",1), |
| 269 | + ("\U0009FFFF",1), |
| 270 | + ("\U000AFFFD",0), |
| 271 | + ("\U000AFFFE",1), |
| 272 | + ("\U000AFFFF",1), |
| 273 | + ("\U000BFFFD",0), |
| 274 | + ("\U000BFFFE",1), |
| 275 | + ("\U000BFFFF",1), |
| 276 | + ("\U000CFFFD",0), |
| 277 | + ("\U000CFFFE",1), |
| 278 | + ("\U000CFFFF",1), |
| 279 | + ("\U000DFFFD",0), |
| 280 | + ("\U000DFFFE",1), |
| 281 | + ("\U000DFFFF",1), |
| 282 | + ("\U000EFFFD",0), |
| 283 | + ("\U000EFFFE",1), |
| 284 | + ("\U000EFFFF",1), |
| 285 | + ("\U000FFFFD",0), |
| 286 | + ("\U000FFFFE",1), |
| 287 | + ("\U000FFFFF",1), |
| 288 | + ("\U0010FFFD",0), |
| 289 | + ("\U0010FFFE",1), |
| 290 | + ("\U0010FFFF",1), |
| 291 | + ("\x01\x01\x01",3), |
| 292 | + ("a\x01a\x01a\x01a",3)]) |
| 293 | +deftest_invalid_codepoints(inp,num): |
| 294 | +stream=HTMLUnicodeInputStream(StringIO(inp)) |
| 295 | +for_iinrange(len(inp)): |
| 296 | +stream.char() |
| 297 | +assertlen(stream.errors)==num |
| 298 | + |
| 299 | + |
| 300 | +@pytest.mark.skipif(notsupports_lone_surrogates,reason="doesn't support lone surrogates") |
| 301 | +@pytest.mark.parametrize("inp,num", |
| 302 | + [("'\\uD7FF'",0), |
| 303 | + ("'\\uD800'",1), |
| 304 | + ("'\\uDBFF'",1), |
| 305 | + ("'\\uDC00'",1), |
| 306 | + ("'\\uDFFF'",1), |
| 307 | + ("'\\uE000'",0), |
| 308 | + ("'\\uD800\\uD800\\uD800'",3), |
| 309 | + ("'a\\uD800a\\uD800a\\uD800a'",3), |
| 310 | + ("'\\uDFFF\\uDBFF'",2), |
| 311 | +pytest.mark.skipif(sys.maxunicode==0xFFFF, |
| 312 | + ("'\\uDBFF\\uDFFF'",2), |
| 313 | +reason="narrow Python")]) |
| 314 | +deftest_invalid_codepoints_surrogates(inp,num): |
| 315 | +inp=eval(inp) |
| 316 | +fp=StringIO(inp) |
| 317 | +iford(max(fp.read()))>0xFFFF: |
| 318 | +pytest.skip("StringIO altered string") |
| 319 | +fp.seek(0) |
| 320 | +stream=HTMLUnicodeInputStream(fp) |
| 321 | +for_iinrange(len(inp)): |
| 322 | +stream.char() |
| 323 | +assertlen(stream.errors)==num |