Mar 8, 2025 · Mar 8, 2025 · Mar 8, 2025 · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/Lib/glob.py b/Lib/glob.py
 _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
 _no_recurse_symlinks = object()

 def escape_regex_range_including_seps(pat, seps):
    """Escape ranges containing seperators in a path
    """
    pat = list(pat)
    ordinal_seps=set(map(ord, seps))

    insideRange = False
    ds=[]

    buf=''
    idx1=0
    idx2=0
    rangeIncludesSep=False

    for path_idx, path_ch in enumerate(pat):
        if path_idx > 0:
            if path_ch == '[' and pat[path_idx-1] != '\\':
                insideRange = True
                idx1=path_idx
                continue
            if path_ch == ']' and pat[path_idx-1] != '\\':
                insideRange = False
                idx2=path_idx+1

        if insideRange:
            buf+=path_ch
            if path_ch == '-':
                glob_range = list(range(ord(pat[path_idx-1]), ord(pat[path_idx+1])))
                if ordinal_seps.intersection(glob_range):
                    rangeIncludesSep = True

        elif len(buf)>0:
            ds.append([idx1, idx2, rangeIncludesSep])

            buf=''
            idx1=1
            idx2=2
            rangeIncludesSep=False

    for ds_idx, ds_elem in enumerate(ds):
        idx1=ds_elem[0]
        idx2=ds_elem[1]
        rangeIncludesSep=ds_elem[2]
        if rangeIncludesSep:
            pat.insert(idx1, '\\')
            pat.insert(idx2, '\\')

    return ''.join(pat)

 def translate(pat, *, recursive=False, include_hidden=False, seps=None):
    """Translate a pathname with shell wildcards to a regular expression.
            seps = (os.path.sep, os.path.altsep)
        else:
            seps = os.path.sep


    escaped_seps = ''.join(map(re.escape, seps))
    any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
    not_sep = f'[^{escaped_seps}]'
            if part:
                if not include_hidden and part[0] in '*?':
                    results.append(r'(?!\.)')

                results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0])

            if idx < last_part_idx:
                results.append(any_sep)

    res = ''.join(results)
    res=escape_regex_range_including_seps(res, seps=seps)
    return fr'(?s:{res})\Z'


diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py
        self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z')
        self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z')

        self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo\[%-0\]bar)\Z')
        self.assertEqual(fn('foo[U-d]bar'), r'(?s:foo\[U-d\]bar)\Z')


 if __name__ == "__main__":
    unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst
 Glob.translate escapes regex ranges that ecompass path seperator.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -263,6 +263,54 @@ def escape(pathname):
		_dir_open_flags = os.O_RDONLY \| getattr(os, 'O_DIRECTORY', 0)
		_no_recurse_symlinks = object()

		def escape_regex_range_including_seps(pat, seps):
		"""Escape ranges containing seperators in a path
		"""
		pat = list(pat)
		ordinal_seps=set(map(ord, seps))

		insideRange = False
		ds=[]

		buf=''
		idx1=0
		idx2=0
		rangeIncludesSep=False

		for path_idx, path_ch in enumerate(pat):
		if path_idx > 0:
		if path_ch == '[' and pat[path_idx-1] != '\\':
		insideRange = True
		idx1=path_idx
		continue
		if path_ch == ']' and pat[path_idx-1] != '\\':
		insideRange = False
		idx2=path_idx+1

		if insideRange:
		buf+=path_ch
		if path_ch == '-':
		glob_range = list(range(ord(pat[path_idx-1]), ord(pat[path_idx+1])))
		if ordinal_seps.intersection(glob_range):
		rangeIncludesSep = True

		elif len(buf)>0:
		ds.append([idx1, idx2, rangeIncludesSep])

		buf=''
		idx1=1
		idx2=2
		rangeIncludesSep=False

		for ds_idx, ds_elem in enumerate(ds):
		idx1=ds_elem[0]
		idx2=ds_elem[1]
		rangeIncludesSep=ds_elem[2]
		if rangeIncludesSep:
		pat.insert(idx1, '\\')
		pat.insert(idx2, '\\')

		return ''.join(pat)

Copy link Member picnixzMar 12, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Please revert Copy link Member picnixzMar 17, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Please revert.
		def translate(pat, *, recursive=False, include_hidden=False, seps=None):
		"""Translate a pathname with shell wildcards to a regular expression.
Expand All		@@ -282,6 +330,8 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
		seps = (os.path.sep, os.path.altsep)
		else:
		seps = os.path.sep


		escaped_seps = ''.join(map(re.escape, seps))
		any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
		not_sep = f'[^{escaped_seps}]'
Expand DownExpand Up		@@ -312,10 +362,14 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
		if part:
		if not include_hidden and part[0] in '*?':
		results.append(r'(?!\.)')

		results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0])

		if idx < last_part_idx:
		results.append(any_sep)

		res = ''.join(results)
		res=escape_regex_range_including_seps(res, seps=seps)
		return fr'(?s:{res})\Z'


Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -514,6 +514,9 @@ def fn(pat):
		self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z')
Copy link Member picnixzMar 17, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. More generally, can you upodate`test_translate_matching` and include the examples ofhttps://man7.org/linux/man-pages/man7/glob.7.html so that we have a compliant implementation?
		self.assertEqual(fn('*/'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z')

		self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo\[%-0\]bar)\Z')
Copy link dkaszewsMar 11, 2025• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I'm not sure this is correct. From my understanding of manpages quoted in the issue, a class should be escaped only if it contains a literal path separator, not a range encompassing it. In latter case, we need to just exclude the separator. [%-0]=> (?!/)[%-0][ab/]=> \[ab/\] Edge case to be tested in bash and`glob.glob`: is a range beginning with separator (`[%-/]` or`[/-0]`) the first case or the second one? What about corner case of single element range`[/-/]`? I would say that all three should be escaped since they "contain an explicit`/` character". Copy link dkaszewsMar 11, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Also, does A range containing an explicit '/' character is syntactically incorrect. (POSIX requires that syntactically incorrect patterns are left unchanged.) mean that entire glob should be escaped, or just the part with the separator? I.e, does`[ab][0/][xy]` map to`[ab]\[0/\][xy]` or`\[ab\]\[0/\]\[xy\]`? Copy link Contributor barneygaleMar 11, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Relevant standard seems to be here:https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/V3_chap02.html#tag_18_13_01 Copy link Author dmitrin9Mar 11, 2025• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. (Thus, "[]-]" matches just the two characters ']' and '-', and "[--0]" matches the three characters '-', '.', and '0', since '/' cannot be matched.) This would indicate that a range which includes a '/' character as a non-literal would match that range but exclude the '/' character, at least with my interpretation. I got that from the glob manpage. Copy link dkaszewsMar 11, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. 2.13.3.1 looks to back my interpretation: If path separator appears between brackets, be it a single character or next to a hyphen, escape entire bracket expression. For`'\\' in seps` case, be careful to check it is not an escape but actual`'\\\\'`. Else, if any hyphened range spans a separator, add a negative lookahead. For simplicity, it can also be added for any bracket expression with a hyphen, or any bracket at all - result is the same, just simplifies regex in most cases. All bracket expressions are analyzed separately, so path separator in one does not invalidate and escape all others. Copy link dkaszewsMar 11, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. @dmitya26 I don't have Python on hand, can you just quickly run`glob.glob('a[/-b]c')` on a following tree: \|-- abc`-- a[ `-- -b]c If it returns`[abc]`, then you are correct, if it returns the file in subdir then my interpretation seems to match existing implementation. Copy link Author dmitrin9Mar 11, 2025• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. It returns '[]'. edit: Oh wait I think I might've misread how the directories need to be structured. Copy link Author dmitrin9Mar 11, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. ├── a[ │ └── -b]c └── abc and glob.glob('a[/-b]c') would return ['a[/-b]c'] for me. dkaszews reacted with thumbs up emoji Copy link Author dmitrin9Mar 14, 2025• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Wait so regarding the spec, do you think we should be disallowing only '/' characters, the system's path separator (os.path.sep), or all path separators mentioned like the ones in glob.translate? Copy link dkaszewsMar 15, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Current implementation already extends the spec to all given separators, e.g.`glob.translate('abc?', seps=['/', '\\'])` maps to`'(?s:abc[^/\\\\])\\Z'`.
		self.assertEqual(fn('foo[U-d]bar'), r'(?s:foo\[U-d\]bar)\Z')


		if __name__ == "__main__":
		unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Glob.translate escapes regex ranges that ecompass path seperator.