Nov 29, 2022 · Aug 13, 2018 · Nov 27, 2022 · Nov 27, 2022 · Nov 24, 2022 · Nov 27, 2022
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py

 NULL = None

 class Str(str):
    pass


 class CAPITest(unittest.TestCase):

    @support.cpython_only
        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
                  'a\ud800b\udfffc', '\ud834\udd1e']:
            self.assertEqual(fromobject(s), s)
            o = Str(s)
            s2 = fromobject(o)
            self.assertEqual(s2, s)
            self.assertIs(type(s2), str)
            self.assertIsNot(s2, s)

        self.assertRaises(TypeError, fromobject, b'abc')
        self.assertRaises(TypeError, fromobject, [])
        self.assertRaises(ValueError, split, 'a|b|c|d', '')
        self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
        self.assertRaises(TypeError, split, [], '|')
        # split(NULL, '|')
        #CRASHESsplit(NULL, '|')

    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
        self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
        self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
        self.assertRaises(TypeError, rsplit, [], '|')
        # rsplit(NULL, '|')
        #CRASHESrsplit(NULL, '|')

    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')

        self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
        self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
        self.assertEqual(translate('abc', {}), 'abc')
        self.assertEqual(translate('abc', []), 'abc')
        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
        self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
        self.assertRaises(TypeError, translate, 'abc', 123)
        self.assertRaises(TypeError, translate, 'abc', NULL)
        self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
        # CRASHES translate(NULL, [])

    @support.cpython_only
        """Test PyUnicode_Join()"""
        from _testcapi import unicode_join as join
        self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
        self.assertEqual(join('|', ['a', '', 'c']), 'a||c')
        self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
        self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
        self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в')
        """Test PyUnicode_Tailmatch()"""
        from _testcapi import unicode_tailmatch as tailmatch

        #for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
            #for i, ch in enumerate(str):
                #self.assertEqual(tailmatch(str, ch, 0, len(str), 1), i)
                #self.assertEqual(tailmatch(str, ch, 0, len(str), -1), i)

        str = 'ababahalamaha'
        self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
        self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_format(self):
        """TestPyUnicode_Contains()"""
        """TestPyUnicode_Format()"""
        from _testcapi import unicode_format as format

        self.assertEqual(format('x=%d!', 42), 'x=42!')
        self.assertEqual(isidentifier("["), 0)
        self.assertEqual(isidentifier("©"), 0)
        self.assertEqual(isidentifier("0"), 0)
        self.assertEqual(isidentifier("32M"), 0)

        # CRASHES isidentifier(b"a")
        # CRASHES isidentifier([])
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,6 +11,10 @@

		NULL = None

		class Str(str):
		pass


		class CAPITest(unittest.TestCase):

		@support.cpython_only
Expand All		@@ -22,6 +26,11 @@ def test_fromobject(self):
		for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
		'a\ud800b\udfffc', '\ud834\udd1e']:
		self.assertEqual(fromobject(s), s)
		o = Str(s)
		s2 = fromobject(o)
		self.assertEqual(s2, s)
		self.assertIs(type(s2), str)
		self.assertIsNot(s2, s)

		self.assertRaises(TypeError, fromobject, b'abc')
		self.assertRaises(TypeError, fromobject, [])
Expand DownExpand Up		@@ -438,7 +447,7 @@ def test_split(self):
		self.assertRaises(ValueError, split, 'a\|b\|c\|d', '')
		self.assertRaises(TypeError, split, 'a\|b\|c\|d', ord('\|'))
		self.assertRaises(TypeError, split, [], '\|')
		# split(NULL, '\|')
		#CRASHESsplit(NULL, '\|')

		@support.cpython_only
		@unittest.skipIf(_testcapi is None, 'need _testcapi module')
Expand All		@@ -462,7 +471,7 @@ def test_rsplit(self):
		self.assertRaises(ValueError, rsplit, 'a\|b\|c\|d', '')
		self.assertRaises(TypeError, rsplit, 'a\|b\|c\|d', ord('\|'))
		self.assertRaises(TypeError, rsplit, [], '\|')
		# rsplit(NULL, '\|')
		#CRASHESrsplit(NULL, '\|')

		@support.cpython_only
		@unittest.skipIf(_testcapi is None, 'need _testcapi module')
Expand DownExpand Up		@@ -530,6 +539,7 @@ def test_translate(self):

		self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
		self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
		self.assertEqual(translate('abc', {}), 'abc')
		self.assertEqual(translate('abc', []), 'abc')
		self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
Copy link Member vstinnerNov 24, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I don't understand. None is supposed to delete the "b" character:https://docs.python.org/dev/library/stdtypes.html#text-sequence-type-str The mapping table must map Unicode ordinal integers to Unicode ordinal integers orNone (causing deletion of the character). Is the doc wrong? Copy link MemberAuthor serhiy-storchakaNov 27, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. The doc is wrong. Copy link Member vstinnerNov 28, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Ah. The surprising part is that str.translate() treats None as "delete: `>>> "abc".translate(str.maketrans({'b': None}))'ac'` Well, it would be nice to update the doc (maybe in a separated PR). Copy link MemberAuthor serhiy-storchakaNov 29, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Because`str.translate` calls`PyUnicode_Translate()` with the error handler`"ignore"`.
		self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
Expand All		@@ -543,6 +553,7 @@ def test_translate(self):
		self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
		self.assertRaises(TypeError, translate, 'abc', 123)
		self.assertRaises(TypeError, translate, 'abc', NULL)
		self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
		# CRASHES translate(NULL, [])

		@support.cpython_only
Expand All		@@ -551,6 +562,7 @@ def test_join(self):
		"""Test PyUnicode_Join()"""
		from _testcapi import unicode_join as join
		self.assertEqual(join('\|', ['a', 'b', 'c']), 'a\|b\|c')
		self.assertEqual(join('\|', ['a', '', 'c']), 'a\|\|c')
		self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
		self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
		self.assertEqual(join('\|', ['а', 'б', 'в']), 'а\|б\|в')
Expand DownExpand Up		@@ -596,11 +608,6 @@ def test_tailmatch(self):
		"""Test PyUnicode_Tailmatch()"""
		from _testcapi import unicode_tailmatch as tailmatch

		#for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
		#for i, ch in enumerate(str):
		#self.assertEqual(tailmatch(str, ch, 0, len(str), 1), i)
		#self.assertEqual(tailmatch(str, ch, 0, len(str), -1), i)

		str = 'ababahalamaha'
		self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
		self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
Expand DownExpand Up		@@ -790,7 +797,7 @@ def test_richcompare(self):
		@support.cpython_only
		@unittest.skipIf(_testcapi is None, 'need _testcapi module')
		def test_format(self):
		"""TestPyUnicode_Contains()"""
		"""TestPyUnicode_Format()"""
		from _testcapi import unicode_format as format

		self.assertEqual(format('x=%d!', 42), 'x=42!')
Expand DownExpand Up		@@ -838,6 +845,7 @@ def test_isidentifier(self):
		self.assertEqual(isidentifier("["), 0)
		self.assertEqual(isidentifier("©"), 0)
		self.assertEqual(isidentifier("0"), 0)
serhiy-storchaka marked this conversation as resolved. Show resolvedHide resolved
		self.assertEqual(isidentifier("32M"), 0)

		# CRASHES isidentifier(b"a")
		# CRASHES isidentifier([])
Expand Down