|
12 | 12 | fromgzipimportGzipFile |
13 | 13 | fromioimportBytesIO,UnsupportedOperation |
14 | 14 | fromdistutils.versionimportStrictVersion |
| 15 | +importhashlib |
| 16 | +importtime |
15 | 17 |
|
16 | 18 | fromnumpy.compat.py3kimportasstr,asbytes |
17 | 19 | from ..openersimport (Opener, |
18 | 20 | ImageOpener, |
19 | 21 | HAVE_INDEXED_GZIP, |
20 | 22 | BZ2File, |
| 23 | +DeterministicGzipFile, |
21 | 24 | ) |
22 | 25 | from ..tmpdirsimportInTemporaryDirectory |
23 | 26 | from ..volumeutilsimportBinOpener |
@@ -367,3 +370,126 @@ def test_iter(): |
367 | 370 | lobj=Opener(Lunk('')) |
368 | 371 | withpytest.raises(TypeError): |
369 | 372 | list(lobj) |
| 373 | + |
| 374 | + |
| 375 | +defmd5sum(fname): |
| 376 | +withopen(fname,"rb")asfobj: |
| 377 | +returnhashlib.md5(fobj.read()).hexdigest() |
| 378 | + |
| 379 | + |
| 380 | +deftest_DeterministicGzipFile(): |
| 381 | +withInTemporaryDirectory(): |
| 382 | +msg=b"Hello, I'd like to have an argument." |
| 383 | + |
| 384 | +# No filename, no mtime |
| 385 | +withopen("ref.gz","wb")asfobj: |
| 386 | +withGzipFile(filename="",mode="wb",fileobj=fobj,mtime=0)asgzobj: |
| 387 | +gzobj.write(msg) |
| 388 | +anon_chksum=md5sum("ref.gz") |
| 389 | + |
| 390 | +withDeterministicGzipFile("default.gz","wb")asfobj: |
| 391 | +internal_fobj=fobj.myfileobj |
| 392 | +fobj.write(msg) |
| 393 | +# Check that myfileobj is being closed by GzipFile.close() |
| 394 | +# This is in case GzipFile changes its internal implementation |
| 395 | +assertinternal_fobj.closed |
| 396 | + |
| 397 | +assertmd5sum("default.gz")==anon_chksum |
| 398 | + |
| 399 | +# No filename, current mtime |
| 400 | +now=time.time() |
| 401 | +withopen("ref.gz","wb")asfobj: |
| 402 | +withGzipFile(filename="",mode="wb",fileobj=fobj,mtime=now)asgzobj: |
| 403 | +gzobj.write(msg) |
| 404 | +now_chksum=md5sum("ref.gz") |
| 405 | + |
| 406 | +withDeterministicGzipFile("now.gz","wb",mtime=now)asfobj: |
| 407 | +fobj.write(msg) |
| 408 | + |
| 409 | +assertmd5sum("now.gz")==now_chksum |
| 410 | + |
| 411 | +# Change in default behavior |
| 412 | +withmock.patch("time.time")ast: |
| 413 | +t.return_value=now |
| 414 | + |
| 415 | +# GzipFile will use time.time() |
| 416 | +withopen("ref.gz","wb")asfobj: |
| 417 | +withGzipFile(filename="",mode="wb",fileobj=fobj)asgzobj: |
| 418 | +gzobj.write(msg) |
| 419 | +assertmd5sum("ref.gz")==now_chksum |
| 420 | + |
| 421 | +# DeterministicGzipFile will use 0 |
| 422 | +withDeterministicGzipFile("now.gz","wb")asfobj: |
| 423 | +fobj.write(msg) |
| 424 | +assertmd5sum("now.gz")==anon_chksum |
| 425 | + |
| 426 | +# GzipFile is filename dependent, DeterministicGzipFile is independent |
| 427 | +withGzipFile("filenameA.gz",mode="wb",mtime=0)asgzobj: |
| 428 | +gzobj.write(msg) |
| 429 | +fnameA_chksum=md5sum("filenameA.gz") |
| 430 | +assertfnameA_chksum!=anon_chksum |
| 431 | + |
| 432 | +withDeterministicGzipFile("filenameA.gz","wb")asfobj: |
| 433 | +fobj.write(msg) |
| 434 | + |
| 435 | +# But the contents are the same with different filenames |
| 436 | +assertmd5sum("filenameA.gz")==anon_chksum |
| 437 | + |
| 438 | + |
| 439 | +deftest_DeterministicGzipFile_fileobj(): |
| 440 | +withInTemporaryDirectory(): |
| 441 | +msg=b"Hello, I'd like to have an argument." |
| 442 | +withopen("ref.gz","wb")asfobj: |
| 443 | +withGzipFile(filename="",mode="wb",fileobj=fobj,mtime=0)asgzobj: |
| 444 | +gzobj.write(msg) |
| 445 | +ref_chksum=md5sum("ref.gz") |
| 446 | + |
| 447 | +withopen("test.gz","wb")asfobj: |
| 448 | +withDeterministicGzipFile(filename="",mode="wb",fileobj=fobj)asgzobj: |
| 449 | +gzobj.write(msg) |
| 450 | +md5sum("test.gz")==ref_chksum |
| 451 | + |
| 452 | +withopen("test.gz","wb")asfobj: |
| 453 | +withDeterministicGzipFile(fileobj=fobj,mode="wb")asgzobj: |
| 454 | +gzobj.write(msg) |
| 455 | +md5sum("test.gz")==ref_chksum |
| 456 | + |
| 457 | +withopen("test.gz","wb")asfobj: |
| 458 | +withDeterministicGzipFile(filename="test.gz",mode="wb",fileobj=fobj)asgzobj: |
| 459 | +gzobj.write(msg) |
| 460 | +md5sum("test.gz")==ref_chksum |
| 461 | + |
| 462 | + |
| 463 | +deftest_bitwise_determinism(): |
| 464 | +withInTemporaryDirectory(): |
| 465 | +msg=b"Hello, I'd like to have an argument." |
| 466 | +# Canonical reference: No filename, no mtime |
| 467 | +# Use default compresslevel |
| 468 | +withopen("ref.gz","wb")asfobj: |
| 469 | +withGzipFile(filename="",mode="wb", |
| 470 | +compresslevel=1,fileobj=fobj, |
| 471 | +mtime=0)asgzobj: |
| 472 | +gzobj.write(msg) |
| 473 | +anon_chksum=md5sum("ref.gz") |
| 474 | + |
| 475 | +# Different times, different filenames |
| 476 | +now=time.time() |
| 477 | +withmock.patch("time.time")ast: |
| 478 | +t.return_value=now |
| 479 | +withOpener("a.gz","wb")asfobj: |
| 480 | +fobj.write(msg) |
| 481 | +t.return_value=now+1 |
| 482 | +withOpener("b.gz","wb")asfobj: |
| 483 | +fobj.write(msg) |
| 484 | + |
| 485 | +assertmd5sum("a.gz")==anon_chksum |
| 486 | +assertmd5sum("b.gz")==anon_chksum |
| 487 | + |
| 488 | +# Users can still set mtime, but filenames will not be embedded |
| 489 | +withOpener("filenameA.gz","wb",mtime=0xCAFE10C0)asfobj: |
| 490 | +fobj.write(msg) |
| 491 | +withOpener("filenameB.gz","wb",mtime=0xCAFE10C0)asfobj: |
| 492 | +fobj.write(msg) |
| 493 | +fnameA_chksum=md5sum("filenameA.gz") |
| 494 | +fnameB_chksum=md5sum("filenameB.gz") |
| 495 | +assertfnameA_chksum==fnameB_chksum!=anon_chksum |