Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork32k
gh-84013: normalize directory contents during import#113447
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
Changes fromall commits
b7ba389
7f4b01b
6ea9497
c216e08
3f16ae1
80b6eec
b9e8927
7d2d2bf
18d99ae
985e50a
c13edc8
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1345,8 +1345,9 @@ def __init__(self, path, *loader_details): | ||
else: | ||
self.path = _path_abspath(path) | ||
self._path_mtime = -1 | ||
self._path_cache = {} | ||
self._relaxed_path_cache = {} | ||
self._cache_is_normalized = False | ||
def invalidate_caches(self): | ||
"""Invalidate the directory mtime.""" | ||
@@ -1372,15 +1373,21 @@ def find_spec(self, fullname, target=None): | ||
self._fill_cache() | ||
self._path_mtime = mtime | ||
# tail_module keeps the original casing, for __file__ and friends | ||
if not tail_module.isascii() and not self._cache_is_normalized: | ||
self._normalize_cache() | ||
Comment on lines +1376 to +1377 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. For the sake of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Regardless, we should probably have some test cases that use one of those functions rather than syntax. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. I changed the code to keep the unnormalised name around, we need the unnormalised name to access the filesystem and need the normalised name to compare to I agree that there should be more test cases, I started out with just the minimal test from the issue to see if I can get this to work without bootstrapping issues and get some feedback. I'll definitely add more tests when this approach is seen as acceptable by@brettcannon (as the importlib expert). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. If the cache always keeps the raw name then can we drop | ||
if _relax_case(): | ||
cache = self._relaxed_path_cache | ||
cache_module = tail_module.lower() | ||
else: | ||
cache = self._path_cache | ||
cache_module = tail_module | ||
# Check if the module is the name of a directory (and thus a package). | ||
try: | ||
cache_path = cache[cache_module] | ||
except KeyError: | ||
pass | ||
else: | ||
base_path = _path_join(self.path, cache_path) | ||
for suffix, loader_class in self._loaders: | ||
init_filename = '__init__' + suffix | ||
full_path = _path_join(base_path, init_filename) | ||
@@ -1392,15 +1399,21 @@ def find_spec(self, fullname, target=None): | ||
is_namespace = _path_isdir(base_path) | ||
# Check for a file w/ a proper suffix exists. | ||
for suffix, loader_class in self._loaders: | ||
# XXX: Why is ValueError caught here? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. This change worries me a little because I don't understand why the original code catches Removing this like I did here does not result in test failures. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. UnicodeEncodeError if the name is not compatible with the filesystem encoding, ValueError for embedded null characters? | ||
#try: | ||
# full_path = _path_join(self.path, tail_module + suffix) | ||
#except ValueError: | ||
# return None | ||
_bootstrap._verbose_message('trying {}{} in {}', cache_module, suffix, self.path, verbosity=2) | ||
try: | ||
cache_path = cache[cache_module + suffix] | ||
except KeyError: | ||
pass | ||
else: | ||
full_path = _path_join(self.path, cache_path) | ||
if _path_isfile(full_path): | ||
return self._get_spec(loader_class, fullname, full_path, None, target) | ||
if is_namespace: | ||
_bootstrap._verbose_message('possible namespace for {}', base_path) | ||
spec = _bootstrap.ModuleSpec(fullname, None) | ||
@@ -1420,24 +1433,35 @@ def _fill_cache(self): | ||
# We store two cached versions, to handle runtime changes of the | ||
# PYTHONCASEOK environment variable. | ||
if not sys.platform.startswith('win'): | ||
self._path_cache ={ p: p for p incontents } | ||
else: | ||
# Windows users can import modules with case-insensitive file | ||
# suffixes (for legacy reasons). Make the suffix lowercase here | ||
# so it's done once instead of for every import. This is safe as | ||
# the specified suffixes to check against are always specified in a | ||
# case-sensitive manner. | ||
lower_suffix_contents ={} | ||
for item in contents: | ||
name, dot, suffix = item.partition('.') | ||
if dot: | ||
new_name = f'{name}.{suffix.lower()}' | ||
else: | ||
new_name = name | ||
lower_suffix_contents[new_name] = item | ||
self._path_cache = lower_suffix_contents | ||
if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS): | ||
self._relaxed_path_cache = {fn.lower(): fn for fn in contents} | ||
self._cache_is_normalized = False | ||
def _normalize_cache(self): | ||
"""Normalize all entries in the caches to NFKC.""" | ||
from unicodedata import normalize | ||
self._path_cache = { normalize('NFKC', p): p for p in self._path_cache } | ||
self._relaxed_path_cache = { normalize('NFKC', p): p for p in self._relaxed_path_cache } | ||
self._cache_is_normalized = True | ||
@classmethod | ||
def path_hook(cls, *loader_details): | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
It is now possible to import modules from the filesystem regardless of how | ||
the name is normalized in the filesystem. This in particular affects | ||
importing modules with a name that contains accented latin characters. | ||
Comment on lines +1 to +3 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. This probably also needs a What's New entry, lets worry about that when the PR is closer to acceptance to avoid creating merge conflicts. |
Uh oh!
There was an error while loading.Please reload this page.