7070
7171class UrlParseTestCase (unittest .TestCase ):
7272
73- def checkRoundtrips (self ,url ,parsed ,split ):
73+ def checkRoundtrips (self ,url ,parsed ,split ,url2 = None ):
74+ if url2 is None :
75+ url2 = url
7476result = urllib .parse .urlparse (url )
7577self .assertEqual (result ,parsed )
7678t = (result .scheme ,result .netloc ,result .path ,
7779result .params ,result .query ,result .fragment )
7880self .assertEqual (t ,parsed )
7981# put it back together and it should be the same
8082result2 = urllib .parse .urlunparse (result )
81- self .assertEqual (result2 ,url )
83+ self .assertEqual (result2 ,url2 )
8284self .assertEqual (result2 ,result .geturl ())
8385
8486# the result of geturl() is a fixpoint; we can always parse it
@@ -104,7 +106,7 @@ def checkRoundtrips(self, url, parsed, split):
104106result .query ,result .fragment )
105107self .assertEqual (t ,split )
106108result2 = urllib .parse .urlunsplit (result )
107- self .assertEqual (result2 ,url )
109+ self .assertEqual (result2 ,url2 )
108110self .assertEqual (result2 ,result .geturl ())
109111
110112# check the fixpoint property of re-parsing the result of geturl()
@@ -142,9 +144,39 @@ def test_qs(self):
142144
143145def test_roundtrips (self ):
144146str_cases = [
147+ ('path/to/file' ,
148+ ('' ,'' ,'path/to/file' ,'' ,'' ,'' ),
149+ ('' ,'' ,'path/to/file' ,'' ,'' )),
150+ ('/path/to/file' ,
151+ ('' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
152+ ('' ,'' ,'/path/to/file' ,'' ,'' )),
153+ ('//path/to/file' ,
154+ ('' ,'path' ,'/to/file' ,'' ,'' ,'' ),
155+ ('' ,'path' ,'/to/file' ,'' ,'' )),
156+ ('////path/to/file' ,
157+ ('' ,'' ,'//path/to/file' ,'' ,'' ,'' ),
158+ ('' ,'' ,'//path/to/file' ,'' ,'' )),
159+ ('scheme:path/to/file' ,
160+ ('scheme' ,'' ,'path/to/file' ,'' ,'' ,'' ),
161+ ('scheme' ,'' ,'path/to/file' ,'' ,'' )),
162+ ('scheme:/path/to/file' ,
163+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
164+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' )),
165+ ('scheme://path/to/file' ,
166+ ('scheme' ,'path' ,'/to/file' ,'' ,'' ,'' ),
167+ ('scheme' ,'path' ,'/to/file' ,'' ,'' )),
168+ ('scheme:////path/to/file' ,
169+ ('scheme' ,'' ,'//path/to/file' ,'' ,'' ,'' ),
170+ ('scheme' ,'' ,'//path/to/file' ,'' ,'' )),
145171 ('file:///tmp/junk.txt' ,
146172 ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
147173 ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
174+ ('file:////tmp/junk.txt' ,
175+ ('file' ,'' ,'//tmp/junk.txt' ,'' ,'' ,'' ),
176+ ('file' ,'' ,'//tmp/junk.txt' ,'' ,'' )),
177+ ('file://///tmp/junk.txt' ,
178+ ('file' ,'' ,'///tmp/junk.txt' ,'' ,'' ,'' ),
179+ ('file' ,'' ,'///tmp/junk.txt' ,'' ,'' )),
148180 ('imap://mail.python.org/mbox1' ,
149181 ('imap' ,'mail.python.org' ,'/mbox1' ,'' ,'' ,'' ),
150182 ('imap' ,'mail.python.org' ,'/mbox1' ,'' ,'' )),
@@ -175,6 +207,38 @@ def _encode(t):
175207for url ,parsed ,split in str_cases + bytes_cases :
176208self .checkRoundtrips (url ,parsed ,split )
177209
210+ def test_roundtrips_normalization (self ):
211+ str_cases = [
212+ ('///path/to/file' ,
213+ '/path/to/file' ,
214+ ('' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
215+ ('' ,'' ,'/path/to/file' ,'' ,'' )),
216+ ('scheme:///path/to/file' ,
217+ 'scheme:/path/to/file' ,
218+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
219+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' )),
220+ ('file:/tmp/junk.txt' ,
221+ 'file:///tmp/junk.txt' ,
222+ ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
223+ ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
224+ ('http:/tmp/junk.txt' ,
225+ 'http:///tmp/junk.txt' ,
226+ ('http' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
227+ ('http' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
228+ ('https:/tmp/junk.txt' ,
229+ 'https:///tmp/junk.txt' ,
230+ ('https' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
231+ ('https' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
232+ ]
233+ def _encode (t ):
234+ return (t [0 ].encode ('ascii' ),
235+ t [1 ].encode ('ascii' ),
236+ tuple (x .encode ('ascii' )for x in t [2 ]),
237+ tuple (x .encode ('ascii' )for x in t [3 ]))
238+ bytes_cases = [_encode (x )for x in str_cases ]
239+ for url ,url2 ,parsed ,split in str_cases + bytes_cases :
240+ self .checkRoundtrips (url ,parsed ,split ,url2 )
241+
178242def test_http_roundtrips (self ):
179243# urllib.parse.urlsplit treats 'http:' as an optimized special case,
180244# so we test both 'http:' and 'https:' in all the following.