103103
104104class UrlParseTestCase (unittest .TestCase ):
105105
106- def checkRoundtrips (self ,url ,parsed ,split ):
106+ def checkRoundtrips (self ,url ,parsed ,split ,url2 = None ):
107+ if url2 is None :
108+ url2 = url
107109result = urllib .parse .urlparse (url )
108110self .assertSequenceEqual (result ,parsed )
109111t = (result .scheme ,result .netloc ,result .path ,
110112result .params ,result .query ,result .fragment )
111113self .assertSequenceEqual (t ,parsed )
112114# put it back together and it should be the same
113115result2 = urllib .parse .urlunparse (result )
114- self .assertSequenceEqual (result2 ,url )
116+ self .assertSequenceEqual (result2 ,url2 )
115117self .assertSequenceEqual (result2 ,result .geturl ())
116118
117119# the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
137139result .query ,result .fragment )
138140self .assertSequenceEqual (t ,split )
139141result2 = urllib .parse .urlunsplit (result )
140- self .assertSequenceEqual (result2 ,url )
142+ self .assertSequenceEqual (result2 ,url2 )
141143self .assertSequenceEqual (result2 ,result .geturl ())
142144
143145# check the fixpoint property of re-parsing the result of geturl()
@@ -175,9 +177,39 @@ def test_qs(self):
175177
176178def test_roundtrips (self ):
177179str_cases = [
180+ ('path/to/file' ,
181+ ('' ,'' ,'path/to/file' ,'' ,'' ,'' ),
182+ ('' ,'' ,'path/to/file' ,'' ,'' )),
183+ ('/path/to/file' ,
184+ ('' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
185+ ('' ,'' ,'/path/to/file' ,'' ,'' )),
186+ ('//path/to/file' ,
187+ ('' ,'path' ,'/to/file' ,'' ,'' ,'' ),
188+ ('' ,'path' ,'/to/file' ,'' ,'' )),
189+ ('////path/to/file' ,
190+ ('' ,'' ,'//path/to/file' ,'' ,'' ,'' ),
191+ ('' ,'' ,'//path/to/file' ,'' ,'' )),
192+ ('scheme:path/to/file' ,
193+ ('scheme' ,'' ,'path/to/file' ,'' ,'' ,'' ),
194+ ('scheme' ,'' ,'path/to/file' ,'' ,'' )),
195+ ('scheme:/path/to/file' ,
196+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
197+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' )),
198+ ('scheme://path/to/file' ,
199+ ('scheme' ,'path' ,'/to/file' ,'' ,'' ,'' ),
200+ ('scheme' ,'path' ,'/to/file' ,'' ,'' )),
201+ ('scheme:////path/to/file' ,
202+ ('scheme' ,'' ,'//path/to/file' ,'' ,'' ,'' ),
203+ ('scheme' ,'' ,'//path/to/file' ,'' ,'' )),
178204 ('file:///tmp/junk.txt' ,
179205 ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
180206 ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
207+ ('file:////tmp/junk.txt' ,
208+ ('file' ,'' ,'//tmp/junk.txt' ,'' ,'' ,'' ),
209+ ('file' ,'' ,'//tmp/junk.txt' ,'' ,'' )),
210+ ('file://///tmp/junk.txt' ,
211+ ('file' ,'' ,'///tmp/junk.txt' ,'' ,'' ,'' ),
212+ ('file' ,'' ,'///tmp/junk.txt' ,'' ,'' )),
181213 ('imap://mail.python.org/mbox1' ,
182214 ('imap' ,'mail.python.org' ,'/mbox1' ,'' ,'' ,'' ),
183215 ('imap' ,'mail.python.org' ,'/mbox1' ,'' ,'' )),
@@ -213,6 +245,38 @@ def _encode(t):
213245for url ,parsed ,split in str_cases + bytes_cases :
214246self .checkRoundtrips (url ,parsed ,split )
215247
248+ def test_roundtrips_normalization (self ):
249+ str_cases = [
250+ ('///path/to/file' ,
251+ '/path/to/file' ,
252+ ('' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
253+ ('' ,'' ,'/path/to/file' ,'' ,'' )),
254+ ('scheme:///path/to/file' ,
255+ 'scheme:/path/to/file' ,
256+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' ,'' ),
257+ ('scheme' ,'' ,'/path/to/file' ,'' ,'' )),
258+ ('file:/tmp/junk.txt' ,
259+ 'file:///tmp/junk.txt' ,
260+ ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
261+ ('file' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
262+ ('http:/tmp/junk.txt' ,
263+ 'http:///tmp/junk.txt' ,
264+ ('http' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
265+ ('http' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
266+ ('https:/tmp/junk.txt' ,
267+ 'https:///tmp/junk.txt' ,
268+ ('https' ,'' ,'/tmp/junk.txt' ,'' ,'' ,'' ),
269+ ('https' ,'' ,'/tmp/junk.txt' ,'' ,'' )),
270+ ]
271+ def _encode (t ):
272+ return (t [0 ].encode ('ascii' ),
273+ t [1 ].encode ('ascii' ),
274+ tuple (x .encode ('ascii' )for x in t [2 ]),
275+ tuple (x .encode ('ascii' )for x in t [3 ]))
276+ bytes_cases = [_encode (x )for x in str_cases ]
277+ for url ,url2 ,parsed ,split in str_cases + bytes_cases :
278+ self .checkRoundtrips (url ,parsed ,split ,url2 )
279+
216280def test_http_roundtrips (self ):
217281# urllib.parse.urlsplit treats 'http:' as an optimized special case,
218282# so we test both 'http:' and 'https:' in all the following.