11import dataclasses
2- from typing import Iterator ,Pattern ,Protocol
2+ from typing import Iterator ,Optional , Pattern ,Protocol
33
44from libvcs ._internal .dataclasses import SkipDefaultFieldsReprMixin
55
@@ -13,7 +13,7 @@ def __init__(self, url: str):
1313def to_url (self )-> str :
1414 ...
1515
16- def is_valid (self ,url :str )-> bool :
16+ def is_valid (self ,url :str , is_explicit : Optional [ bool ] = None )-> bool :
1717 ...
1818
1919
@@ -28,6 +28,8 @@ class Matcher(SkipDefaultFieldsReprMixin):
2828pattern :Pattern
2929"""Regex pattern"""
3030pattern_defaults :dict = dataclasses .field (default_factory = dict )
31+ """Is the match unambiguous with other VCS systems? e.g. git+ prefix"""
32+ is_explicit :bool = False
3133
3234
3335@dataclasses .dataclass (repr = False )
@@ -37,10 +39,11 @@ class MatcherRegistry(SkipDefaultFieldsReprMixin):
3739_matchers :dict [str ,Matcher ]= dataclasses .field (default_factory = dict )
3840
3941def register (self ,cls :Matcher )-> None :
40- """
42+ r """
4143
4244 .. currentmodule:: libvcs.parse.git
4345
46+ >>> from dataclasses import dataclass
4447 >>> from libvcs.parse.git import GitURL, GitBaseURL
4548
4649 :class:`GitBaseURL` - the ``git(1)`` compliant parser - won't accept a pip-style URL:
@@ -56,30 +59,69 @@ def register(self, cls: Matcher) -> None:
5659 But what if you wanted to do ``github:org/repo``?
5760
5861 >>> GitURL.is_valid(url="github:org/repo")
59- False
62+ True
63+
64+ That actually works, but look, it's caught in git's standard SCP regex:
65+
66+ >>> GitURL(url="github:org/repo")
67+ GitURL(url=github:org/repo,
68+ hostname=github,
69+ path=org/repo,
70+ matcher=core-git-scp)
71+
72+ We need something more specific. What do we do?
6073
6174 **Extending matching capability:**
6275
6376 >>> class GitHubPrefix(Matcher):
6477 ... label = 'gh-prefix'
6578 ... description ='Matches prefixes like github:org/repo'
66- ... pattern = r'^github:(?P<path>) '
79+ ... pattern = r'^github:(?P<path>.*)$ '
6780 ... pattern_defaults = {
6881 ... 'hostname': 'github.com',
6982 ... 'scheme': 'https'
7083 ... }
84+ ... # We know it's git, not any other VCS
85+ ... is_explicit = True
7186
72- >>> class GitHubLocation(GitURL):
73- ... matchers = MatcherRegistry = MatcherRegistry(
87+ >>> @dataclasses.dataclass(repr=False)
88+ ... class GitHubURL(GitURL):
89+ ... matchers: MatcherRegistry = MatcherRegistry(
7490 ... _matchers={'github_prefix': GitHubPrefix}
7591 ... )
7692
77- >>> GitHubLocation.is_valid(url='github:vcs-python/libvcs')
93+ >>> GitHubURL.is_valid(url='github:vcs-python/libvcs')
94+ True
95+
96+ >>> GitHubURL.is_valid(url='github:vcs-python/libvcs', is_explicit=True)
7897 True
7998
80- >>> GitHubLocation.is_valid(url='gitlab:vcs-python/libvcs')
99+ Notice how ``pattern_defaults`` neatly fills the values for us.
100+
101+ >>> GitHubURL(url='github:vcs-python/libvcs')
102+ GitHubURL(url=github:vcs-python/libvcs,
103+ scheme=https,
104+ hostname=github.com,
105+ path=vcs-python/libvcs,
106+ matcher=gh-prefix)
107+
108+ >>> GitHubURL(url='github:vcs-python/libvcs').to_url()
109+ 'https://github.com/vcs-python/libvcs'
110+
111+ >>> GitHubURL.is_valid(url='gitlab:vcs-python/libvcs')
81112 False
82113
114+ `GitHubURL` sees this as invalid since it only has one matcher,
115+ `GitHubPrefix`.
116+
117+ >>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
118+ True
119+
120+ Same story, getting caught in ``git(1)``'s own liberal scp-style URL:
121+
122+ >>> GitURL(url='gitlab:vcs-python/libvcs').matcher
123+ 'core-git-scp'
124+
83125 >>> class GitLabPrefix(Matcher):
84126 ... label = 'gl-prefix'
85127 ... description ='Matches prefixes like gitlab:org/repo'
@@ -92,25 +134,33 @@ def register(self, cls: Matcher) -> None:
92134
93135 Option 1: Create a brand new matcher
94136
95- >>> class GitLabLocation(GitURL):
96- ... matchers = MatcherRegistry = MatcherRegistry(
97- ... _matchers={'gitlab_prefix': GitLabPrefix}
98- ... )
137+ >>> @dataclasses.dataclass(repr=False)
138+ ... class GitLabURL(GitURL):
139+ ... matchers: MatcherRegistry = MatcherRegistry(
140+ ... _matchers={'gitlab_prefix': GitLabPrefix}
141+ ... )
99142
100- >>>GitLabLocation .is_valid(url='gitlab:vcs-python/libvcs')
143+ >>>GitLabURL .is_valid(url='gitlab:vcs-python/libvcs')
101144 True
102145
103146 Option 2 (global, everywhere): Add to the global :class:`GitURL`:
104147
105148 >>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
106- False
149+ True
150+
151+ Are we home free, though? Remember our issue with vague matches.
152+
153+ >>> GitURL(url='gitlab:vcs-python/libvcs').matcher
154+ 'core-git-scp'
155+
156+ Register:
107157
108158 >>> GitURL.matchers.register(GitLabPrefix)
109159
110160 >>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
111161 True
112162
113- git URLs + pip-style git URLs:
163+ **Example: git URLs + pip-style git URLs:**
114164
115165 This is already in :class:`GitURL` via :data:`PIP_DEFAULT_MATCHERS`. For the
116166 sake of showing how extensibility works, here is a recreation based on
@@ -120,8 +170,9 @@ def register(self, cls: Matcher) -> None:
120170
121171 >>> from libvcs.parse.git import DEFAULT_MATCHERS, PIP_DEFAULT_MATCHERS
122172
123- >>> class GitURLWithPip(GitBaseURL):
124- ... matchers = MatcherRegistry = MatcherRegistry(
173+ >>> @dataclasses.dataclass(repr=False)
174+ ... class GitURLWithPip(GitBaseURL):
175+ ... matchers: MatcherRegistry = MatcherRegistry(
125176 ... _matchers={m.label: m for m in [*DEFAULT_MATCHERS, *PIP_DEFAULT_MATCHERS]}
126177 ... )
127178