3535Union ,
3636cast ,
3737)
38- from urllib .parse import urljoin ,urlparse
38+ from urllib .parse import ParseResult , urljoin ,urlparse , urlunparse
3939
4040from playwright ._impl ._api_structures import NameValue
4141from playwright ._impl ._errors import (
@@ -210,8 +210,12 @@ def map_token(original: str, replacement: str) -> str:
210210# Handle special case of http*://, note that the new schema has to be
211211# a web schema so that slashes are properly inserted after domain.
212212if index == 0 and token .endswith (":" ):
213- # Using a simple replacement for the scheme part
214- processed_parts .append (map_token (token ,"http:" ))
213+ # Replace any pattern with http:
214+ if "*" in token or "{" in token :
215+ processed_parts .append (map_token (token ,"http:" ))
216+ else :
217+ # Preserve explicit schema as is as it may affect trailing slashes after domain.
218+ processed_parts .append (token )
215219continue
216220question_index = token .find ("?" )
217221if question_index == - 1 :
@@ -222,55 +226,49 @@ def map_token(original: str, replacement: str) -> str:
222226processed_parts .append (new_prefix + new_suffix )
223227
224228relative_path = "/" .join (processed_parts )
225- resolved_url ,case_insensitive_part = resolve_base_url (base_url ,relative_path )
229+ resolved ,case_insensitive_part = resolve_base_url (base_url ,relative_path )
226230
227- for replacement ,original in token_map .items ():
228- normalize = case_insensitive_part and replacement in case_insensitive_part
229- resolved_url = resolved_url .replace (
230- replacement ,original .lower ()if normalize else original ,1
231+ for token ,original in token_map .items ():
232+ normalize = case_insensitive_part and token in case_insensitive_part
233+ resolved = resolved .replace (
234+ token ,original .lower ()if normalize else original ,1
231235 )
232236
233- return ensure_trailing_slash ( resolved_url )
237+ return resolved
234238
235239
236240def resolve_base_url (
237241base_url :Optional [str ],given_url :str
238242)-> Tuple [str ,Optional [str ]]:
239243try :
240- resolved = urljoin (base_url if base_url is not None else "" ,given_url )
241- parsed = urlparse (resolved )
244+ url = nodelike_urlparse (
245+ urljoin (base_url if base_url is not None else "" ,given_url )
246+ )
247+ resolved = urlunparse (url )
242248# Schema and domain are case-insensitive.
243249hostname_port = (
244- parsed .hostname or ""
250+ url .hostname or ""
245251 )# can't use parsed.netloc because it includes userinfo (username:password)
246- if parsed .port :
247- hostname_port += f":{ parsed .port } "
248- case_insensitive_prefix = f"{ parsed .scheme } ://{ hostname_port } "
252+ if url .port :
253+ hostname_port += f":{ url .port } "
254+ case_insensitive_prefix = f"{ url .scheme } ://{ hostname_port } "
249255return resolved ,case_insensitive_prefix
250256except Exception :
251257return given_url ,None
252258
253259
254- # In Node.js, new URL('http://localhost') returns 'http://localhost/'.
255- # To ensure the same url matching behavior, do the same.
256- def ensure_trailing_slash (url :str )-> str :
257- split = url .split ("://" ,maxsplit = 1 )
258- if len (split )== 2 :
259- # URL parser doesn't like strange/unknown schemes, so we replace it for parsing, then put it back
260- parsable_url = "http://" + split [1 ]
261- else :
262- # Given current rules, this should never happen _and_ still be a valid matcher. We require the protocol to be part of the match,
263- # so either the user is using a glob that starts with "*" (and none of this code is running), or the user actually has `something://` in `match`
264- parsable_url = url
265- parsed = urlparse (parsable_url ,allow_fragments = True )
266- if len (split )== 2 :
267- # Replace the scheme that we removed earlier
268- parsed = parsed ._replace (scheme = split [0 ])
269- if parsed .path == "" :
270- parsed = parsed ._replace (path = "/" )
271- url = parsed .geturl ()
272-
273- return url
260+ def nodelike_urlparse (url :str )-> ParseResult :
261+ parsed = urlparse (url ,allow_fragments = True )
262+
263+ # https://url.spec.whatwg.org/#special-scheme
264+ is_special_url = parsed .scheme in ["http" ,"https" ,"ws" ,"wss" ,"ftp" ,"file" ]
265+ if is_special_url :
266+ # special urls have a list path, list paths are serialized as follows: https://url.spec.whatwg.org/#url-path-serializer
267+ # urllib diverges, so we patch it here
268+ if parsed .path == "" :
269+ parsed = parsed ._replace (path = "/" )
270+
271+ return parsed
274272
275273
276274class HarLookupResult (TypedDict ,total = False ):