Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitfc897fc

Browse files
gh-76960: Fix urljoin() and urldefrag() for URIs with empty components (GH-123273)
* urljoin() with relative reference "?" sets empty query and removes fragment.* Preserve empty components (authority, params, query, fragment) in urljoin().* Preserve empty components (authority, params, query) in urldefrag().Also refactor the code and get rid of double _coerce_args() and_coerce_result() calls in urljoin(), urldefrag(), urlparse() andurlunparse().
1 parente5a567b commitfc897fc

File tree

3 files changed

+140
-52
lines changed

3 files changed

+140
-52
lines changed

‎Lib/test/test_urlparse.py

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -349,18 +349,19 @@ def _encode(t):
349349
split= (scheme,)+split
350350
self.checkRoundtrips(url,parsed,split)
351351

352-
defcheckJoin(self,base,relurl,expected):
352+
defcheckJoin(self,base,relurl,expected,*,relroundtrip=True):
353353
withself.subTest(base=base,relurl=relurl):
354354
self.assertEqual(urllib.parse.urljoin(base,relurl),expected)
355355
baseb=base.encode('ascii')
356356
relurlb=relurl.encode('ascii')
357357
expectedb=expected.encode('ascii')
358358
self.assertEqual(urllib.parse.urljoin(baseb,relurlb),expectedb)
359359

360-
relurl=urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
361-
self.assertEqual(urllib.parse.urljoin(base,relurl),expected)
362-
relurlb=urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
363-
self.assertEqual(urllib.parse.urljoin(baseb,relurlb),expectedb)
360+
ifrelroundtrip:
361+
relurl=urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
362+
self.assertEqual(urllib.parse.urljoin(base,relurl),expected)
363+
relurlb=urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
364+
self.assertEqual(urllib.parse.urljoin(baseb,relurlb),expectedb)
364365

365366
deftest_unparse_parse(self):
366367
str_cases= ['Python','./Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
@@ -526,8 +527,6 @@ def test_RFC3986(self):
526527

527528
deftest_urljoins(self):
528529
self.checkJoin(SIMPLE_BASE,'g:h','g:h')
529-
self.checkJoin(SIMPLE_BASE,'http:g','http://a/b/c/g')
530-
self.checkJoin(SIMPLE_BASE,'http:','http://a/b/c/d')
531530
self.checkJoin(SIMPLE_BASE,'g','http://a/b/c/g')
532531
self.checkJoin(SIMPLE_BASE,'./g','http://a/b/c/g')
533532
self.checkJoin(SIMPLE_BASE,'g/','http://a/b/c/g/')
@@ -548,8 +547,6 @@ def test_urljoins(self):
548547
self.checkJoin(SIMPLE_BASE,'g/./h','http://a/b/c/g/h')
549548
self.checkJoin(SIMPLE_BASE,'g/../h','http://a/b/c/h')
550549
self.checkJoin(SIMPLE_BASE,'http:g','http://a/b/c/g')
551-
self.checkJoin(SIMPLE_BASE,'http:','http://a/b/c/d')
552-
self.checkJoin(SIMPLE_BASE,'http:?y','http://a/b/c/d?y')
553550
self.checkJoin(SIMPLE_BASE,'http:g?y','http://a/b/c/g?y')
554551
self.checkJoin(SIMPLE_BASE,'http:g?y/./x','http://a/b/c/g?y/./x')
555552
self.checkJoin('http:///','..','http:///')
@@ -579,6 +576,53 @@ def test_urljoins(self):
579576
# issue 23703: don't duplicate filename
580577
self.checkJoin('a','b','b')
581578

579+
# Test with empty (but defined) components.
580+
self.checkJoin(RFC1808_BASE,'','http://a/b/c/d;p?q#f')
581+
self.checkJoin(RFC1808_BASE,'#','http://a/b/c/d;p?q#',relroundtrip=False)
582+
self.checkJoin(RFC1808_BASE,'#z','http://a/b/c/d;p?q#z')
583+
self.checkJoin(RFC1808_BASE,'?','http://a/b/c/d;p?',relroundtrip=False)
584+
self.checkJoin(RFC1808_BASE,'?#z','http://a/b/c/d;p?#z',relroundtrip=False)
585+
self.checkJoin(RFC1808_BASE,'?y','http://a/b/c/d;p?y')
586+
self.checkJoin(RFC1808_BASE,';','http://a/b/c/;')
587+
self.checkJoin(RFC1808_BASE,';?y','http://a/b/c/;?y')
588+
self.checkJoin(RFC1808_BASE,';#z','http://a/b/c/;#z')
589+
self.checkJoin(RFC1808_BASE,';x','http://a/b/c/;x')
590+
self.checkJoin(RFC1808_BASE,'/w','http://a/w')
591+
self.checkJoin(RFC1808_BASE,'//','http://a/b/c/d;p?q#f')
592+
self.checkJoin(RFC1808_BASE,'//#z','http://a/b/c/d;p?q#z')
593+
self.checkJoin(RFC1808_BASE,'//?y','http://a/b/c/d;p?y')
594+
self.checkJoin(RFC1808_BASE,'//;x','http://;x')
595+
self.checkJoin(RFC1808_BASE,'///w','http://a/w')
596+
self.checkJoin(RFC1808_BASE,'//v','http://v')
597+
# For backward compatibility with RFC1630, the scheme name is allowed
598+
# to be present in a relative reference if it is the same as the base
599+
# URI scheme.
600+
self.checkJoin(RFC1808_BASE,'http:','http://a/b/c/d;p?q#f')
601+
self.checkJoin(RFC1808_BASE,'http:#','http://a/b/c/d;p?q#',relroundtrip=False)
602+
self.checkJoin(RFC1808_BASE,'http:#z','http://a/b/c/d;p?q#z')
603+
self.checkJoin(RFC1808_BASE,'http:?','http://a/b/c/d;p?',relroundtrip=False)
604+
self.checkJoin(RFC1808_BASE,'http:?#z','http://a/b/c/d;p?#z',relroundtrip=False)
605+
self.checkJoin(RFC1808_BASE,'http:?y','http://a/b/c/d;p?y')
606+
self.checkJoin(RFC1808_BASE,'http:;','http://a/b/c/;')
607+
self.checkJoin(RFC1808_BASE,'http:;?y','http://a/b/c/;?y')
608+
self.checkJoin(RFC1808_BASE,'http:;#z','http://a/b/c/;#z')
609+
self.checkJoin(RFC1808_BASE,'http:;x','http://a/b/c/;x')
610+
self.checkJoin(RFC1808_BASE,'http:/w','http://a/w')
611+
self.checkJoin(RFC1808_BASE,'http://','http://a/b/c/d;p?q#f')
612+
self.checkJoin(RFC1808_BASE,'http://#z','http://a/b/c/d;p?q#z')
613+
self.checkJoin(RFC1808_BASE,'http://?y','http://a/b/c/d;p?y')
614+
self.checkJoin(RFC1808_BASE,'http://;x','http://;x')
615+
self.checkJoin(RFC1808_BASE,'http:///w','http://a/w')
616+
self.checkJoin(RFC1808_BASE,'http://v','http://v')
617+
# Different scheme is not ignored.
618+
self.checkJoin(RFC1808_BASE,'https:','https:',relroundtrip=False)
619+
self.checkJoin(RFC1808_BASE,'https:#','https:#',relroundtrip=False)
620+
self.checkJoin(RFC1808_BASE,'https:#z','https:#z',relroundtrip=False)
621+
self.checkJoin(RFC1808_BASE,'https:?','https:?',relroundtrip=False)
622+
self.checkJoin(RFC1808_BASE,'https:?y','https:?y',relroundtrip=False)
623+
self.checkJoin(RFC1808_BASE,'https:;','https:;')
624+
self.checkJoin(RFC1808_BASE,'https:;x','https:;x')
625+
582626
deftest_RFC2732(self):
583627
str_cases= [
584628
('http://Test.python.org:5432/foo/','test.python.org',5432),
@@ -641,16 +685,31 @@ def test_urldefrag(self):
641685
('http://python.org/p?q','http://python.org/p?q',''),
642686
(RFC1808_BASE,'http://a/b/c/d;p?q','f'),
643687
(RFC2396_BASE,'http://a/b/c/d;p?q',''),
688+
('http://a/b/c;p?q#f','http://a/b/c;p?q','f'),
689+
('http://a/b/c;p?q#','http://a/b/c;p?q',''),
690+
('http://a/b/c;p?q','http://a/b/c;p?q',''),
691+
('http://a/b/c;p?#f','http://a/b/c;p?','f'),
692+
('http://a/b/c;p#f','http://a/b/c;p','f'),
693+
('http://a/b/c;?q#f','http://a/b/c;?q','f'),
694+
('http://a/b/c?q#f','http://a/b/c?q','f'),
695+
('http:///b/c;p?q#f','http:///b/c;p?q','f'),
696+
('http:b/c;p?q#f','http:b/c;p?q','f'),
697+
('http:;?q#f','http:;?q','f'),
698+
('http:?q#f','http:?q','f'),
699+
('//a/b/c;p?q#f','//a/b/c;p?q','f'),
700+
('://a/b/c;p?q#f','://a/b/c;p?q','f'),
644701
]
645702
def_encode(t):
646703
returntype(t)(x.encode('ascii')forxint)
647704
bytes_cases= [_encode(x)forxinstr_cases]
648705
forurl,defrag,fraginstr_cases+bytes_cases:
649-
result=urllib.parse.urldefrag(url)
650-
self.assertEqual(result.geturl(),url)
651-
self.assertEqual(result, (defrag,frag))
652-
self.assertEqual(result.url,defrag)
653-
self.assertEqual(result.fragment,frag)
706+
withself.subTest(url):
707+
result=urllib.parse.urldefrag(url)
708+
hash='#'ifisinstance(url,str)elseb'#'
709+
self.assertEqual(result.geturl(),url.rstrip(hash))
710+
self.assertEqual(result, (defrag,frag))
711+
self.assertEqual(result.url,defrag)
712+
self.assertEqual(result.fragment,frag)
654713

655714
deftest_urlsplit_scoped_IPv6(self):
656715
p=urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')

‎Lib/urllib/parse.py

Lines changed: 62 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -392,20 +392,23 @@ def urlparse(url, scheme='', allow_fragments=True):
392392
Note that % escapes are not expanded.
393393
"""
394394
url,scheme,_coerce_result=_coerce_args(url,scheme)
395-
splitresult=urlsplit(url,scheme,allow_fragments)
396-
scheme,netloc,url,query,fragment=splitresult
397-
ifschemeinuses_paramsand';'inurl:
398-
url,params=_splitparams(url)
399-
else:
400-
params=''
401-
result=ParseResult(scheme,netloc,url,params,query,fragment)
395+
scheme,netloc,url,params,query,fragment=_urlparse(url,scheme,allow_fragments)
396+
result=ParseResult(schemeor'',netlocor'',url,paramsor'',queryor'',fragmentor'')
402397
return_coerce_result(result)
403398

404-
def_splitparams(url):
399+
def_urlparse(url,scheme=None,allow_fragments=True):
400+
scheme,netloc,url,query,fragment=_urlsplit(url,scheme,allow_fragments)
401+
if (schemeor'')inuses_paramsand';'inurl:
402+
url,params=_splitparams(url,allow_none=True)
403+
else:
404+
params=None
405+
return (scheme,netloc,url,params,query,fragment)
406+
407+
def_splitparams(url,allow_none=False):
405408
if'/'inurl:
406409
i=url.find(';',url.rfind('/'))
407410
ifi<0:
408-
returnurl,''
411+
returnurl,Noneifallow_noneelse''
409412
else:
410413
i=url.find(';')
411414
returnurl[:i],url[i+1:]
@@ -472,17 +475,23 @@ def urlsplit(url, scheme='', allow_fragments=True):
472475
"""
473476

474477
url,scheme,_coerce_result=_coerce_args(url,scheme)
478+
scheme,netloc,url,query,fragment=_urlsplit(url,scheme,allow_fragments)
479+
v=SplitResult(schemeor'',netlocor'',url,queryor'',fragmentor'')
480+
return_coerce_result(v)
481+
482+
def_urlsplit(url,scheme=None,allow_fragments=True):
475483
# Only lstrip url as some applications rely on preserving trailing space.
476484
# (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
477485
url=url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
478-
scheme=scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
479-
480486
forbin_UNSAFE_URL_BYTES_TO_REMOVE:
481487
url=url.replace(b,"")
482-
scheme=scheme.replace(b,"")
488+
ifschemeisnotNone:
489+
scheme=scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
490+
forbin_UNSAFE_URL_BYTES_TO_REMOVE:
491+
scheme=scheme.replace(b,"")
483492

484493
allow_fragments=bool(allow_fragments)
485-
netloc=query=fragment=''
494+
netloc=query=fragment=None
486495
i=url.find(':')
487496
ifi>0andurl[0].isascii()andurl[0].isalpha():
488497
forcinurl[:i]:
@@ -503,8 +512,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
503512
if'?'inurl:
504513
url,query=url.split('?',1)
505514
_checknetloc(netloc)
506-
v=SplitResult(scheme,netloc,url,query,fragment)
507-
return_coerce_result(v)
515+
return (scheme,netloc,url,query,fragment)
508516

509517
defurlunparse(components):
510518
"""Put a parsed URL back together again. This may result in a
@@ -513,9 +521,15 @@ def urlunparse(components):
513521
(the draft states that these are equivalent)."""
514522
scheme,netloc,url,params,query,fragment,_coerce_result= (
515523
_coerce_args(*components))
524+
ifnotnetloc:
525+
ifschemeandschemeinuses_netlocand (noturlorurl[:1]=='/'):
526+
netloc=''
527+
else:
528+
netloc=None
516529
ifparams:
517530
url="%s;%s"% (url,params)
518-
return_coerce_result(urlunsplit((scheme,netloc,url,query,fragment)))
531+
return_coerce_result(_urlunsplit(schemeorNone,netloc,url,
532+
queryorNone,fragmentorNone))
519533

520534
defurlunsplit(components):
521535
"""Combine the elements of a tuple as returned by urlsplit() into a
@@ -525,20 +539,27 @@ def urlunsplit(components):
525539
empty query; the RFC states that these are equivalent)."""
526540
scheme,netloc,url,query,fragment,_coerce_result= (
527541
_coerce_args(*components))
528-
ifnetloc:
542+
ifnotnetloc:
543+
ifschemeandschemeinuses_netlocand (noturlorurl[:1]=='/'):
544+
netloc=''
545+
else:
546+
netloc=None
547+
return_coerce_result(_urlunsplit(schemeorNone,netloc,url,
548+
queryorNone,fragmentorNone))
549+
550+
def_urlunsplit(scheme,netloc,url,query,fragment):
551+
ifnetlocisnotNone:
529552
ifurlandurl[:1]!='/':url='/'+url
530553
url='//'+netloc+url
531554
elifurl[:2]=='//':
532555
url='//'+url
533-
elifschemeandschemeinuses_netlocand (noturlorurl[:1]=='/'):
534-
url='//'+url
535556
ifscheme:
536557
url=scheme+':'+url
537-
ifquery:
558+
ifqueryisnotNone:
538559
url=url+'?'+query
539-
iffragment:
560+
iffragmentisnotNone:
540561
url=url+'#'+fragment
541-
return_coerce_result(url)
562+
returnurl
542563

543564
defurljoin(base,url,allow_fragments=True):
544565
"""Join a base URL and a possibly relative URL to form an absolute
@@ -549,26 +570,29 @@ def urljoin(base, url, allow_fragments=True):
549570
returnbase
550571

551572
base,url,_coerce_result=_coerce_args(base,url)
552-
bscheme,bnetloc,bpath,bparams,bquery,bfragment= \
553-
urlparse(base,'',allow_fragments)
554-
scheme,netloc,path,params,query,fragment= \
555-
urlparse(url,bscheme,allow_fragments)
573+
bscheme,bnetloc,bpath,bquery,bfragment= \
574+
_urlsplit(base,None,allow_fragments)
575+
scheme,netloc,path,query,fragment= \
576+
_urlsplit(url,None,allow_fragments)
556577

578+
ifschemeisNone:
579+
scheme=bscheme
557580
ifscheme!=bschemeorschemenotinuses_relative:
558581
return_coerce_result(url)
559582
ifschemeinuses_netloc:
560583
ifnetloc:
561-
return_coerce_result(urlunparse((scheme,netloc,path,
562-
params,query,fragment)))
584+
return_coerce_result(_urlunsplit(scheme,netloc,path,
585+
query,fragment))
563586
netloc=bnetloc
564587

565-
ifnotpathandnotparams:
588+
ifnotpath:
566589
path=bpath
567-
params=bparams
568-
ifnotquery:
590+
ifqueryisNone:
569591
query=bquery
570-
return_coerce_result(urlunparse((scheme,netloc,path,
571-
params,query,fragment)))
592+
iffragmentisNone:
593+
fragment=bfragment
594+
return_coerce_result(_urlunsplit(scheme,netloc,path,
595+
query,fragment))
572596

573597
base_parts=bpath.split('/')
574598
ifbase_parts[-1]!='':
@@ -605,8 +629,8 @@ def urljoin(base, url, allow_fragments=True):
605629
# then we need to append the trailing '/'
606630
resolved_path.append('')
607631

608-
return_coerce_result(urlunparse((scheme,netloc,'/'.join(
609-
resolved_path)or'/',params,query,fragment)))
632+
return_coerce_result(_urlunsplit(scheme,netloc,'/'.join(
633+
resolved_path)or'/',query,fragment))
610634

611635

612636
defurldefrag(url):
@@ -618,12 +642,12 @@ def urldefrag(url):
618642
"""
619643
url,_coerce_result=_coerce_args(url)
620644
if'#'inurl:
621-
s,n,p,a,q,frag=urlparse(url)
622-
defrag=urlunparse((s,n,p,a,q,''))
645+
s,n,p,q,frag=_urlsplit(url)
646+
defrag=_urlunsplit(s,n,p,q,None)
623647
else:
624648
frag=''
625649
defrag=url
626-
return_coerce_result(DefragResult(defrag,frag))
650+
return_coerce_result(DefragResult(defrag,fragor''))
627651

628652
_hexdig='0123456789ABCDEFabcdef'
629653
_hextobyte=None
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix:func:`urllib.parse.urljoin` and:func:`urllib.parse.urldefrag` for URIs
2+
containing empty components. For example,:func:`!urljoin()` with relative
3+
reference "?" now sets empty query and removes fragment.
4+
Preserve empty components (authority, params, query, fragment) in:func:`!urljoin`.
5+
Preserve empty components (authority, params, query) in:func:`!urldefrag`.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp