Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork34k
gh-74902: add unicode grapheme cluster break algorithm#2673
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
b79f969c9a42117f56b78a47de54c152171b103be7c9848e22dee91ea5b3c10ba1e9b7ae061541965ed6164d19bFile filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -848,5 +848,52 @@ class MyStr(str): | ||
| self.assertIs(type(normalize(form, MyStr(input_str))), str) | ||
| class GraphemeBreakTest(unittest.TestCase): | ||
| @staticmethod | ||
| def check_version(testfile): | ||
| hdr = testfile.readline() | ||
| return unicodedata.unidata_version in hdr | ||
Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. What does the file header look like? With string contains tests, I worry about things like Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. We have the same check for normalization tests. | ||
| @requires_resource('network') | ||
| def test_grapheme_break(self): | ||
| TESTDATAFILE = "auxiliary/GraphemeBreakTest.txt" | ||
| TESTDATAURL = f"https://www.unicode.org/Public/{unicodedata.unidata_version}/ucd/{TESTDATAFILE}" | ||
| # Hit the exception early | ||
| try: | ||
| testdata = open_urlresource(TESTDATAURL, encoding="utf-8", | ||
| check=self.check_version) | ||
| except PermissionError: | ||
| self.skipTest(f"Permission error when downloading {TESTDATAURL} " | ||
| f"into the test data directory") | ||
| except (OSError, HTTPException) as exc: | ||
| self.skipTest(f"Failed to download {TESTDATAURL}: {exc}") | ||
| with testdata: | ||
| self.run_grapheme_break_tests(testdata, unicodedata) | ||
| def run_grapheme_break_tests(self, testdata, ucd): | ||
| part = None | ||
| part1_data = set() | ||
| for line in testdata: | ||
| line, _, comment = line.partition('#') | ||
| line = line.strip() | ||
| if not line: | ||
| continue | ||
| comment = comment.strip() | ||
| chunks = [] | ||
| for field in line.replace('×', ' ').split(): | ||
| if field == '÷': | ||
| chunks.append('') | ||
| else: | ||
| chunks[-1] += chr(int(field, 16)) | ||
| self.assertEqual(chunks.pop(), '', line) | ||
| with self.subTest(line): | ||
| result = list(unicodedata.iter_graphemes(''.join(chunks))) | ||
| self.assertEqual(result, chunks, comment) | ||
| if __name__ == "__main__": | ||
| unittest.main() | ||
Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.
Uh oh!
There was an error while loading.Please reload this page.
Uh oh!
There was an error while loading.Please reload this page.
Uh oh!
There was an error while loading.Please reload this page.