@@ -349,18 +349,19 @@ def _encode(t):
349
349
split = (scheme ,)+ split
350
350
self .checkRoundtrips (url ,parsed ,split )
351
351
352
- def checkJoin (self ,base ,relurl ,expected ):
352
+ def checkJoin (self ,base ,relurl ,expected , * , relroundtrip = True ):
353
353
with self .subTest (base = base ,relurl = relurl ):
354
354
self .assertEqual (urllib .parse .urljoin (base ,relurl ),expected )
355
355
baseb = base .encode ('ascii' )
356
356
relurlb = relurl .encode ('ascii' )
357
357
expectedb = expected .encode ('ascii' )
358
358
self .assertEqual (urllib .parse .urljoin (baseb ,relurlb ),expectedb )
359
359
360
- relurl = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurl ))
361
- self .assertEqual (urllib .parse .urljoin (base ,relurl ),expected )
362
- relurlb = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurlb ))
363
- self .assertEqual (urllib .parse .urljoin (baseb ,relurlb ),expectedb )
360
+ if relroundtrip :
361
+ relurl = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurl ))
362
+ self .assertEqual (urllib .parse .urljoin (base ,relurl ),expected )
363
+ relurlb = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurlb ))
364
+ self .assertEqual (urllib .parse .urljoin (baseb ,relurlb ),expectedb )
364
365
365
366
def test_unparse_parse (self ):
366
367
str_cases = ['Python' ,'./Python' ,'x-newscheme://foo.com/stuff' ,'x://y' ,'x:/y' ,'x:/' ,'/' ,]
@@ -526,8 +527,6 @@ def test_RFC3986(self):
526
527
527
528
def test_urljoins (self ):
528
529
self .checkJoin (SIMPLE_BASE ,'g:h' ,'g:h' )
529
- self .checkJoin (SIMPLE_BASE ,'http:g' ,'http://a/b/c/g' )
530
- self .checkJoin (SIMPLE_BASE ,'http:' ,'http://a/b/c/d' )
531
530
self .checkJoin (SIMPLE_BASE ,'g' ,'http://a/b/c/g' )
532
531
self .checkJoin (SIMPLE_BASE ,'./g' ,'http://a/b/c/g' )
533
532
self .checkJoin (SIMPLE_BASE ,'g/' ,'http://a/b/c/g/' )
@@ -548,8 +547,6 @@ def test_urljoins(self):
548
547
self .checkJoin (SIMPLE_BASE ,'g/./h' ,'http://a/b/c/g/h' )
549
548
self .checkJoin (SIMPLE_BASE ,'g/../h' ,'http://a/b/c/h' )
550
549
self .checkJoin (SIMPLE_BASE ,'http:g' ,'http://a/b/c/g' )
551
- self .checkJoin (SIMPLE_BASE ,'http:' ,'http://a/b/c/d' )
552
- self .checkJoin (SIMPLE_BASE ,'http:?y' ,'http://a/b/c/d?y' )
553
550
self .checkJoin (SIMPLE_BASE ,'http:g?y' ,'http://a/b/c/g?y' )
554
551
self .checkJoin (SIMPLE_BASE ,'http:g?y/./x' ,'http://a/b/c/g?y/./x' )
555
552
self .checkJoin ('http:///' ,'..' ,'http:///' )
@@ -579,6 +576,53 @@ def test_urljoins(self):
579
576
# issue 23703: don't duplicate filename
580
577
self .checkJoin ('a' ,'b' ,'b' )
581
578
579
+ # Test with empty (but defined) components.
580
+ self .checkJoin (RFC1808_BASE ,'' ,'http://a/b/c/d;p?q#f' )
581
+ self .checkJoin (RFC1808_BASE ,'#' ,'http://a/b/c/d;p?q#' ,relroundtrip = False )
582
+ self .checkJoin (RFC1808_BASE ,'#z' ,'http://a/b/c/d;p?q#z' )
583
+ self .checkJoin (RFC1808_BASE ,'?' ,'http://a/b/c/d;p?' ,relroundtrip = False )
584
+ self .checkJoin (RFC1808_BASE ,'?#z' ,'http://a/b/c/d;p?#z' ,relroundtrip = False )
585
+ self .checkJoin (RFC1808_BASE ,'?y' ,'http://a/b/c/d;p?y' )
586
+ self .checkJoin (RFC1808_BASE ,';' ,'http://a/b/c/;' )
587
+ self .checkJoin (RFC1808_BASE ,';?y' ,'http://a/b/c/;?y' )
588
+ self .checkJoin (RFC1808_BASE ,';#z' ,'http://a/b/c/;#z' )
589
+ self .checkJoin (RFC1808_BASE ,';x' ,'http://a/b/c/;x' )
590
+ self .checkJoin (RFC1808_BASE ,'/w' ,'http://a/w' )
591
+ self .checkJoin (RFC1808_BASE ,'//' ,'http://a/b/c/d;p?q#f' )
592
+ self .checkJoin (RFC1808_BASE ,'//#z' ,'http://a/b/c/d;p?q#z' )
593
+ self .checkJoin (RFC1808_BASE ,'//?y' ,'http://a/b/c/d;p?y' )
594
+ self .checkJoin (RFC1808_BASE ,'//;x' ,'http://;x' )
595
+ self .checkJoin (RFC1808_BASE ,'///w' ,'http://a/w' )
596
+ self .checkJoin (RFC1808_BASE ,'//v' ,'http://v' )
597
+ # For backward compatibility with RFC1630, the scheme name is allowed
598
+ # to be present in a relative reference if it is the same as the base
599
+ # URI scheme.
600
+ self .checkJoin (RFC1808_BASE ,'http:' ,'http://a/b/c/d;p?q#f' )
601
+ self .checkJoin (RFC1808_BASE ,'http:#' ,'http://a/b/c/d;p?q#' ,relroundtrip = False )
602
+ self .checkJoin (RFC1808_BASE ,'http:#z' ,'http://a/b/c/d;p?q#z' )
603
+ self .checkJoin (RFC1808_BASE ,'http:?' ,'http://a/b/c/d;p?' ,relroundtrip = False )
604
+ self .checkJoin (RFC1808_BASE ,'http:?#z' ,'http://a/b/c/d;p?#z' ,relroundtrip = False )
605
+ self .checkJoin (RFC1808_BASE ,'http:?y' ,'http://a/b/c/d;p?y' )
606
+ self .checkJoin (RFC1808_BASE ,'http:;' ,'http://a/b/c/;' )
607
+ self .checkJoin (RFC1808_BASE ,'http:;?y' ,'http://a/b/c/;?y' )
608
+ self .checkJoin (RFC1808_BASE ,'http:;#z' ,'http://a/b/c/;#z' )
609
+ self .checkJoin (RFC1808_BASE ,'http:;x' ,'http://a/b/c/;x' )
610
+ self .checkJoin (RFC1808_BASE ,'http:/w' ,'http://a/w' )
611
+ self .checkJoin (RFC1808_BASE ,'http://' ,'http://a/b/c/d;p?q#f' )
612
+ self .checkJoin (RFC1808_BASE ,'http://#z' ,'http://a/b/c/d;p?q#z' )
613
+ self .checkJoin (RFC1808_BASE ,'http://?y' ,'http://a/b/c/d;p?y' )
614
+ self .checkJoin (RFC1808_BASE ,'http://;x' ,'http://;x' )
615
+ self .checkJoin (RFC1808_BASE ,'http:///w' ,'http://a/w' )
616
+ self .checkJoin (RFC1808_BASE ,'http://v' ,'http://v' )
617
+ # Different scheme is not ignored.
618
+ self .checkJoin (RFC1808_BASE ,'https:' ,'https:' ,relroundtrip = False )
619
+ self .checkJoin (RFC1808_BASE ,'https:#' ,'https:#' ,relroundtrip = False )
620
+ self .checkJoin (RFC1808_BASE ,'https:#z' ,'https:#z' ,relroundtrip = False )
621
+ self .checkJoin (RFC1808_BASE ,'https:?' ,'https:?' ,relroundtrip = False )
622
+ self .checkJoin (RFC1808_BASE ,'https:?y' ,'https:?y' ,relroundtrip = False )
623
+ self .checkJoin (RFC1808_BASE ,'https:;' ,'https:;' )
624
+ self .checkJoin (RFC1808_BASE ,'https:;x' ,'https:;x' )
625
+
582
626
def test_RFC2732 (self ):
583
627
str_cases = [
584
628
('http://Test.python.org:5432/foo/' ,'test.python.org' ,5432 ),
@@ -641,16 +685,31 @@ def test_urldefrag(self):
641
685
('http://python.org/p?q' ,'http://python.org/p?q' ,'' ),
642
686
(RFC1808_BASE ,'http://a/b/c/d;p?q' ,'f' ),
643
687
(RFC2396_BASE ,'http://a/b/c/d;p?q' ,'' ),
688
+ ('http://a/b/c;p?q#f' ,'http://a/b/c;p?q' ,'f' ),
689
+ ('http://a/b/c;p?q#' ,'http://a/b/c;p?q' ,'' ),
690
+ ('http://a/b/c;p?q' ,'http://a/b/c;p?q' ,'' ),
691
+ ('http://a/b/c;p?#f' ,'http://a/b/c;p?' ,'f' ),
692
+ ('http://a/b/c;p#f' ,'http://a/b/c;p' ,'f' ),
693
+ ('http://a/b/c;?q#f' ,'http://a/b/c;?q' ,'f' ),
694
+ ('http://a/b/c?q#f' ,'http://a/b/c?q' ,'f' ),
695
+ ('http:///b/c;p?q#f' ,'http:///b/c;p?q' ,'f' ),
696
+ ('http:b/c;p?q#f' ,'http:b/c;p?q' ,'f' ),
697
+ ('http:;?q#f' ,'http:;?q' ,'f' ),
698
+ ('http:?q#f' ,'http:?q' ,'f' ),
699
+ ('//a/b/c;p?q#f' ,'//a/b/c;p?q' ,'f' ),
700
+ ('://a/b/c;p?q#f' ,'://a/b/c;p?q' ,'f' ),
644
701
]
645
702
def _encode (t ):
646
703
return type (t )(x .encode ('ascii' )for x in t )
647
704
bytes_cases = [_encode (x )for x in str_cases ]
648
705
for url ,defrag ,frag in str_cases + bytes_cases :
649
- result = urllib .parse .urldefrag (url )
650
- self .assertEqual (result .geturl (),url )
651
- self .assertEqual (result , (defrag ,frag ))
652
- self .assertEqual (result .url ,defrag )
653
- self .assertEqual (result .fragment ,frag )
706
+ with self .subTest (url ):
707
+ result = urllib .parse .urldefrag (url )
708
+ hash = '#' if isinstance (url ,str )else b'#'
709
+ self .assertEqual (result .geturl (),url .rstrip (hash ))
710
+ self .assertEqual (result , (defrag ,frag ))
711
+ self .assertEqual (result .url ,defrag )
712
+ self .assertEqual (result .fragment ,frag )
654
713
655
714
def test_urlsplit_scoped_IPv6 (self ):
656
715
p = urllib .parse .urlsplit ('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234' )