May 21, 2023 · Apr 19, 2023 · Apr 27, 2023 · Apr 29, 2023 · May 2, 2023 · May 2, 2023
diff --git a/Lib/inspect.py b/Lib/inspect.py
            if string == ',':
                current_parameter += 1

        # if (type == ERRORTOKEN) and (string == '$'):
        if (type == OP) and (string == '$'):
            assert self_parameter is None
            self_parameter = current_parameter
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
 from test import support
 from test.support import os_helper
 from tokenize import (tokenize,tokenize2,_tokenize, untokenize, NUMBER, NAME, OP,
 from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
                     STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
                     open as tokenize_open, Untokenizer, generate_tokens,
                     NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT)
                     NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo)
 from io import BytesIO, StringIO
 import unittest
 from textwrap import dedent
        # Format the tokens in s in a table format.
        # The ENDMARKER and final NEWLINE are omitted.
        f = BytesIO(s.encode('utf-8'))
        result = stringify_tokens_from_source(tokenize2(f.readline), s)
        result = stringify_tokens_from_source(tokenize(f.readline), s)
        self.assertEqual(result,
                         ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
                         expected.rstrip().splitlines())
            nonlocal first
            if not first:
                first = True
 return line
 yield line
            else:
 return b''
 yield b''

        # skip the initial encoding token and the end tokens
        tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
        tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
        expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
        self.assertEqual(tokens, expected_tokens,
                         "bytes not decoded with encoding")

    def test__tokenize_does_not_decode_with_encoding_none(self):
        literal = '"ЉЊЈЁЂ"'
        first = False
        def readline():
            nonlocal first
            if not first:
                first = True
                return literal
            else:
                return b''

        # skip the end tokens
        tokens = list(_tokenize(readline, encoding=None))[:-2]
        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
        self.assertEqual(tokens, expected_tokens,
                         "string not tokenized when encoding is None")


 class TestDetectEncoding(TestCase):


 class TestTokenize(TestCase):

    deftest_tokenizee(self):
    deftest_tokenize(self):
        import tokenize as tokenize_module
        encoding = "utf-8"
        encoding_used = None
            encoding_used = encoding
            out = []
            while True:
                next_line = readline()
                try:
                    next_line = next(readline)
                except StopIteration:
                    return out
                if next_line:
                    out.append(next_line)
                    continue
        tokenize_module._tokenize = mock__tokenize
        try:
            results = tokenize(mock_readline)
            self.assertEqual(list(results),
            self.assertEqual(list(results)[1:],
                             [b'first', b'second', b'1', b'2', b'3', b'4'])
        finally:
            tokenize_module.detect_encoding = orig_detect_encoding
            if support.verbose >= 2:
                print('tokenize', testfile)
            with open(testfile, 'rb') as f:
                with self.subTest(file=testfile):
 #with self.subTest(file=testfile):
                    self.check_roundtrip(f)


diff --git a/Lib/tokenize.py b/Lib/tokenize.py
                    self.tokens.append(indent)
                    self.prev_col = len(indent)
                startline = False
            elif tok_type == FSTRING_MIDDLE:
                if '{' in token or '}' in token:
                    end_line, end_col = end
                    end = (end_line, end_col + token.count('{') + token.count('}'))
                    token = re.sub('{', '{{', token)
                    token = re.sub('}', '}}', token)


            self.add_whitespace(start)
            self.tokens.append(token)
            self.prev_row, self.prev_col = end
            elif startline and indents:
                toks_append(indents[-1])
                startline = False
            elif toknum == FSTRING_MIDDLE:
                if '{' in tokval or '}' in tokval:
                    tokval = re.sub('{', '{{', tokval)
                    tokval = re.sub('}', '}}', tokval)

            toks_append(tokval)


        buffer.close()
        raise

 def tokenize2(readline):
    encoding, consumed = detect_encoding(readline)
    rl_gen = _itertools.chain(consumed, iter(readline, b""))
    if encoding is not None:
        if encoding == "utf-8-sig":
            # BOM will already have been stripped.
            encoding = "utf-8"
        yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
    yield from _tokenize2(rl_gen, encoding)

 def _tokenize2(rl_gen, encoding):
    source = b"".join(rl_gen)
    token = None
    for token in _generate_tokens_from_c_tokenizer(source.decode(encoding), extra_tokens=True):
        # TODO: Marta -> limpiar esto
        if 6 < token.type <= 54:
            token = token._replace(type=OP)
        if token.type in {ASYNC, AWAIT}:
            token = token._replace(type=NAME)
        if token.type == NEWLINE:
            l_start, c_start = token.start
            l_end, c_end = token.end
            token = token._replace(string='\n', start=(l_start, c_start), end=(l_end, c_end+1))

        yield token
    if token is not None:
        last_line, _ = token.start
        yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')


 def tokenize(readline):
    """
    The tokenize() generator requires one argument, readline, which
    which tells you which encoding was used to decode the bytes stream.
    """
    encoding, consumed = detect_encoding(readline)
    empty = _itertools.repeat(b"")
    rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
    return _tokenize(rl_gen.__next__, encoding)


 def _tokenize(readline, encoding):
    lnum = parenlev = continued = 0
    numchars = '0123456789'
    contstr, needcont = '', 0
    contline = None
    indents = [0]

    rl_gen = _itertools.chain(consumed, iter(readline, b""))
    if encoding is not None:
        if encoding == "utf-8-sig":
            # BOM will already have been stripped.
            encoding = "utf-8"
        yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
    last_line = b''
    line = b''
    while True:                                # loop over lines in stream
        try:
            # We capture the value of the line variable here because
            # readline uses the empty string '' to signal end of input,
            # hence `line` itself will always be overwritten at the end
            # of this loop.
            last_line = line
            line = readline()
        except StopIteration:
            line = b''

        if encoding is not None:
            line = line.decode(encoding)
        lnum += 1
        pos, max = 0, len(line)

        if contstr:                            # continued string
            if not line:
                raise TokenError("EOF in multi-line string", strstart)
            endmatch = endprog.match(line)
            if endmatch:
                pos = end = endmatch.end(0)
                yield TokenInfo(STRING, contstr + line[:end],
                       strstart, (lnum, end), contline + line)
                contstr, needcont = '', 0
                contline = None
            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
                yield TokenInfo(ERRORTOKEN, contstr + line,
                           strstart, (lnum, len(line)), contline)
                contstr = ''
                contline = None
                continue
            else:
                contstr = contstr + line
                contline = contline + line
                continue

        elif parenlev == 0 and not continued:  # new statement
            if not line: break
            column = 0
            while pos < max:                   # measure leading whitespace
                if line[pos] == ' ':
                    column += 1
                elif line[pos] == '\t':
                    column = (column//tabsize + 1)*tabsize
                elif line[pos] == '\f':
                    column = 0
                else:
                    break
                pos += 1
            if pos == max:
                break

            if line[pos] in '#\r\n':           # skip comments or blank lines
                if line[pos] == '#':
                    comment_token = line[pos:].rstrip('\r\n')
                    yield TokenInfo(COMMENT, comment_token,
                           (lnum, pos), (lnum, pos + len(comment_token)), line)
                    pos += len(comment_token)

                yield TokenInfo(NL, line[pos:],
                           (lnum, pos), (lnum, len(line)), line)
                continue

            if column > indents[-1]:           # count indents or dedents
                indents.append(column)
                yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
            while column < indents[-1]:
                if column not in indents:
                    raise IndentationError(
                        "unindent does not match any outer indentation level",
                        ("<tokenize>", lnum, pos, line))
                indents = indents[:-1]

                yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)

        else:                                  # continued statement
            if not line:
                raise TokenError("EOF in multi-line statement", (lnum, 0))
            continued = 0

        while pos < max:
            pseudomatch = _compile(PseudoToken).match(line, pos)
            if pseudomatch:                                # scan for tokens
                start, end = pseudomatch.span(1)
                spos, epos, pos = (lnum, start), (lnum, end), end
                if start == end:
                    continue
                token, initial = line[start:end], line[start]

                if (initial in numchars or                 # ordinary number
                    (initial == '.' and token != '.' and token != '...')):
                    yield TokenInfo(NUMBER, token, spos, epos, line)
                elif initial in '\r\n':
                    if parenlev > 0:
                        yield TokenInfo(NL, token, spos, epos, line)
                    else:
                        yield TokenInfo(NEWLINE, token, spos, epos, line)

                elif initial == '#':
                    assert not token.endswith("\n")
                    yield TokenInfo(COMMENT, token, spos, epos, line)

                elif token in triple_quoted:
                    endprog = _compile(endpats[token])
                    endmatch = endprog.match(line, pos)
                    if endmatch:                           # all on one line
                        pos = endmatch.end(0)
                        token = line[start:pos]
                        yield TokenInfo(STRING, token, spos, (lnum, pos), line)
                    else:
                        strstart = (lnum, start)           # multiple lines
                        contstr = line[start:]
                        contline = line
                        break

                # Check up to the first 3 chars of the token to see if
                #  they're in the single_quoted set. If so, they start
                #  a string.
                # We're using the first 3, because we're looking for
                #  "rb'" (for example) at the start of the token. If
                #  we switch to longer prefixes, this needs to be
                #  adjusted.
                # Note that initial == token[:1].
                # Also note that single quote checking must come after
                #  triple quote checking (above).
                elif (initial in single_quoted or
                      token[:2] in single_quoted or
                      token[:3] in single_quoted):
                    if token[-1] == '\n':                  # continued string
                        strstart = (lnum, start)
                        # Again, using the first 3 chars of the
                        #  token. This is looking for the matching end
                        #  regex for the correct type of quote
                        #  character. So it's really looking for
                        #  endpats["'"] or endpats['"'], by trying to
                        #  skip string prefix characters, if any.
                        endprog = _compile(endpats.get(initial) or
                                           endpats.get(token[1]) or
                                           endpats.get(token[2]))
                        contstr, needcont = line[start:], 1
                        contline = line
                        break
                    else:                                  # ordinary string
                        yield TokenInfo(STRING, token, spos, epos, line)

                elif initial.isidentifier():               # ordinary name
                    yield TokenInfo(NAME, token, spos, epos, line)
                elif initial == '\\':                      # continued stmt
                    continued = 1
                else:
                    if initial in '([{':
                        parenlev += 1
                    elif initial in ')]}':
                        parenlev -= 1
                    yield TokenInfo(OP, token, spos, epos, line)
            else:
                yield TokenInfo(ERRORTOKEN, line[pos],
                           (lnum, pos), (lnum, pos+1), line)
                pos += 1
    yield from _tokenize(rl_gen, encoding)

 def _tokenize(rl_gen, encoding):
    source = b"".join(rl_gen).decode(encoding)
    token = None
    for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
        # TODO: Marta -> limpiar esto
        if 6 < token.type <= 54:
            token = token._replace(type=OP)
        if token.type in {ASYNC, AWAIT}:
            token = token._replace(type=NAME)
        if token.type == NEWLINE:
            l_start, c_start = token.start
            l_end, c_end = token.end
            token = token._replace(string='\n', start=(l_start, c_start), end=(l_end, c_end+1))

    # Add an implicit NEWLINE if the input doesn't end in one
    if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
        yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
    for indent in indents[1:]:                 # pop remaining indent levels
        yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
        yield token
    if token is not None:
        last_line, _ = token.start
        yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')

 tokenize = tokenize2

 def generate_tokens(readline):
    """Tokenize a source reading Python code as unicode strings.
            if not line:
                return
            yield line.encode()
    return_tokenize2(_gen(), 'utf-8')
    return_tokenize(_gen(), 'utf-8')

 def main():
    import argparse
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2187,7 +2187,6 @@ def _signature_strip_non_python_syntax(signature):
		if string == ',':
		current_parameter += 1

		# if (type == ERRORTOKEN) and (string == '$'):
		if (type == OP) and (string == '$'):
		assert self_parameter is None
		self_parameter = current_parameter
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,9 +1,9 @@
		from test import support
		from test.support import os_helper
		from tokenize import (tokenize,tokenize2,_tokenize, untokenize, NUMBER, NAME, OP,
		from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
		STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
		open as tokenize_open, Untokenizer, generate_tokens,
		NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT)
		NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo)
		from io import BytesIO, StringIO
		import unittest
		from textwrap import dedent
Expand DownExpand Up		@@ -46,7 +46,7 @@ def check_tokenize(self, s, expected):
		# Format the tokens in s in a table format.
		# The ENDMARKER and final NEWLINE are omitted.
		f = BytesIO(s.encode('utf-8'))
		result = stringify_tokens_from_source(tokenize2(f.readline), s)
		result = stringify_tokens_from_source(tokenize(f.readline), s)
		self.assertEqual(result,
		[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
		expected.rstrip().splitlines())
Expand DownExpand Up		@@ -1128,33 +1128,16 @@ def readline():
		nonlocal first
		if not first:
		first = True
		return line
		yield line
		else:
		return b''
		yield b''

		# skip the initial encoding token and the end tokens
		tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
		expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
		tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
		expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
		self.assertEqual(tokens, expected_tokens,
		"bytes not decoded with encoding")

		def test__tokenize_does_not_decode_with_encoding_none(self):
Copy link Member pablogsalMay 19, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. This is being removed because it was testing the`_tokenize` implementation that doesn't exist anymore and is not public
		literal = '"ЉЊЈЁЂ"'
		first = False
		def readline():
		nonlocal first
		if not first:
		first = True
		return literal
		else:
		return b''

		# skip the end tokens
		tokens = list(_tokenize(readline, encoding=None))[:-2]
		expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
		self.assertEqual(tokens, expected_tokens,
		"string not tokenized when encoding is None")


		class TestDetectEncoding(TestCase):

Expand DownExpand Up		@@ -1412,7 +1395,7 @@ def test_open_error(self):

		class TestTokenize(TestCase):

		deftest_tokenizee(self):
		deftest_tokenize(self):
		import tokenize as tokenize_module
		encoding = "utf-8"
		encoding_used = None
Expand All		@@ -1424,7 +1407,10 @@ def mock__tokenize(readline, encoding):
		encoding_used = encoding
		out = []
		while True:
		next_line = readline()
		try:
		next_line = next(readline)
		except StopIteration:
		return out
		if next_line:
		out.append(next_line)
		continue
Expand All		@@ -1444,7 +1430,7 @@ def mock_readline():
		tokenize_module._tokenize = mock__tokenize
		try:
		results = tokenize(mock_readline)
		self.assertEqual(list(results),
		self.assertEqual(list(results)[1:],
		[b'first', b'second', b'1', b'2', b'3', b'4'])
		finally:
		tokenize_module.detect_encoding = orig_detect_encoding
Expand DownExpand Up		@@ -1740,7 +1726,7 @@ def test_random_files(self):
		if support.verbose >= 2:
		print('tokenize', testfile)
		with open(testfile, 'rb') as f:
		with self.subTest(file=testfile):
		#with self.subTest(file=testfile):
		self.check_roundtrip(f)


Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -213,6 +213,14 @@ def untokenize(self, iterable):
		self.tokens.append(indent)
		self.prev_col = len(indent)
		startline = False
		elif tok_type == FSTRING_MIDDLE:
		if '{' in token or '}' in token:
		end_line, end_col = end
		end = (end_line, end_col + token.count('{') + token.count('}'))
		token = re.sub('{', '{{', token)
		token = re.sub('}', '}}', token)


		self.add_whitespace(start)
		self.tokens.append(token)
		self.prev_row, self.prev_col = end
Expand DownExpand Up		@@ -255,6 +263,11 @@ def compat(self, token, iterable):
		elif startline and indents:
		toks_append(indents[-1])
		startline = False
		elif toknum == FSTRING_MIDDLE:
		if '{' in tokval or '}' in tokval:
		tokval = re.sub('{', '{{', tokval)
		tokval = re.sub('}', '}}', tokval)

		toks_append(tokval)


Expand DownExpand Up		@@ -404,36 +417,6 @@ def open(filename):
		buffer.close()
		raise

		def tokenize2(readline):
		encoding, consumed = detect_encoding(readline)
		rl_gen = _itertools.chain(consumed, iter(readline, b""))
		if encoding is not None:
		if encoding == "utf-8-sig":
		# BOM will already have been stripped.
		encoding = "utf-8"
		yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
		yield from _tokenize2(rl_gen, encoding)

		def _tokenize2(rl_gen, encoding):
		source = b"".join(rl_gen)
		token = None
		for token in _generate_tokens_from_c_tokenizer(source.decode(encoding), extra_tokens=True):
		# TODO: Marta -> limpiar esto
		if 6 < token.type <= 54:
		token = token._replace(type=OP)
		if token.type in {ASYNC, AWAIT}:
		token = token._replace(type=NAME)
		if token.type == NEWLINE:
		l_start, c_start = token.start
		l_end, c_end = token.end
		token = token._replace(string='\n', start=(l_start, c_start), end=(l_end, c_end+1))

		yield token
		if token is not None:
		last_line, _ = token.start
		yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')


		def tokenize(readline):
		"""
		The tokenize() generator requires one argument, readline, which
Expand All		@@ -454,194 +437,33 @@ def tokenize(readline):
		which tells you which encoding was used to decode the bytes stream.
		"""
		encoding, consumed = detect_encoding(readline)
		empty = _itertools.repeat(b"")
		rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
		return _tokenize(rl_gen.__next__, encoding)


		def _tokenize(readline, encoding):
		lnum = parenlev = continued = 0
		numchars = '0123456789'
		contstr, needcont = '', 0
		contline = None
		indents = [0]

		rl_gen = _itertools.chain(consumed, iter(readline, b""))
		if encoding is not None:
		if encoding == "utf-8-sig":
		# BOM will already have been stripped.
		encoding = "utf-8"
		yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
		last_line = b''
		line = b''
		while True: # loop over lines in stream
		try:
		# We capture the value of the line variable here because
		# readline uses the empty string '' to signal end of input,
		# hence `line` itself will always be overwritten at the end
		# of this loop.
		last_line = line
		line = readline()
		except StopIteration:
		line = b''

		if encoding is not None:
		line = line.decode(encoding)
		lnum += 1
		pos, max = 0, len(line)

		if contstr: # continued string
		if not line:
		raise TokenError("EOF in multi-line string", strstart)
		endmatch = endprog.match(line)
		if endmatch:
		pos = end = endmatch.end(0)
		yield TokenInfo(STRING, contstr + line[:end],
		strstart, (lnum, end), contline + line)
		contstr, needcont = '', 0
		contline = None
		elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
		yield TokenInfo(ERRORTOKEN, contstr + line,
		strstart, (lnum, len(line)), contline)
		contstr = ''
		contline = None
		continue
		else:
		contstr = contstr + line
		contline = contline + line
		continue

		elif parenlev == 0 and not continued: # new statement
		if not line: break
		column = 0
		while pos < max: # measure leading whitespace
		if line[pos] == ' ':
		column += 1
		elif line[pos] == '\t':
		column = (column//tabsize + 1)*tabsize
		elif line[pos] == '\f':
		column = 0
		else:
		break
		pos += 1
		if pos == max:
		break

		if line[pos] in '#\r\n': # skip comments or blank lines
		if line[pos] == '#':
		comment_token = line[pos:].rstrip('\r\n')
		yield TokenInfo(COMMENT, comment_token,
		(lnum, pos), (lnum, pos + len(comment_token)), line)
		pos += len(comment_token)

		yield TokenInfo(NL, line[pos:],
		(lnum, pos), (lnum, len(line)), line)
		continue

		if column > indents[-1]: # count indents or dedents
		indents.append(column)
		yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
		while column < indents[-1]:
		if column not in indents:
		raise IndentationError(
		"unindent does not match any outer indentation level",
		("<tokenize>", lnum, pos, line))
		indents = indents[:-1]

		yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)

		else: # continued statement
		if not line:
		raise TokenError("EOF in multi-line statement", (lnum, 0))
		continued = 0

		while pos < max:
		pseudomatch = _compile(PseudoToken).match(line, pos)
		if pseudomatch: # scan for tokens
		start, end = pseudomatch.span(1)
		spos, epos, pos = (lnum, start), (lnum, end), end
		if start == end:
		continue
		token, initial = line[start:end], line[start]

		if (initial in numchars or # ordinary number
		(initial == '.' and token != '.' and token != '...')):
		yield TokenInfo(NUMBER, token, spos, epos, line)
		elif initial in '\r\n':
		if parenlev > 0:
		yield TokenInfo(NL, token, spos, epos, line)
		else:
		yield TokenInfo(NEWLINE, token, spos, epos, line)

		elif initial == '#':
		assert not token.endswith("\n")
		yield TokenInfo(COMMENT, token, spos, epos, line)

		elif token in triple_quoted:
		endprog = _compile(endpats[token])
		endmatch = endprog.match(line, pos)
		if endmatch: # all on one line
		pos = endmatch.end(0)
		token = line[start:pos]
		yield TokenInfo(STRING, token, spos, (lnum, pos), line)
		else:
		strstart = (lnum, start) # multiple lines
		contstr = line[start:]
		contline = line
		break

		# Check up to the first 3 chars of the token to see if
		# they're in the single_quoted set. If so, they start
		# a string.
		# We're using the first 3, because we're looking for
		# "rb'" (for example) at the start of the token. If
		# we switch to longer prefixes, this needs to be
		# adjusted.
		# Note that initial == token[:1].
		# Also note that single quote checking must come after
		# triple quote checking (above).
		elif (initial in single_quoted or
		token[:2] in single_quoted or
		token[:3] in single_quoted):
		if token[-1] == '\n': # continued string
		strstart = (lnum, start)
		# Again, using the first 3 chars of the
		# token. This is looking for the matching end
		# regex for the correct type of quote
		# character. So it's really looking for
		# endpats["'"] or endpats['"'], by trying to
		# skip string prefix characters, if any.
		endprog = _compile(endpats.get(initial) or
		endpats.get(token[1]) or
		endpats.get(token[2]))
		contstr, needcont = line[start:], 1
		contline = line
		break
		else: # ordinary string
		yield TokenInfo(STRING, token, spos, epos, line)

		elif initial.isidentifier(): # ordinary name
		yield TokenInfo(NAME, token, spos, epos, line)
		elif initial == '\\': # continued stmt
		continued = 1
		else:
		if initial in '([{':
		parenlev += 1
		elif initial in ')]}':
		parenlev -= 1
		yield TokenInfo(OP, token, spos, epos, line)
		else:
		yield TokenInfo(ERRORTOKEN, line[pos],
		(lnum, pos), (lnum, pos+1), line)
		pos += 1
		yield from _tokenize(rl_gen, encoding)

		def _tokenize(rl_gen, encoding):
		source = b"".join(rl_gen).decode(encoding)
		token = None
		for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
		# TODO: Marta -> limpiar esto
		if 6 < token.type <= 54:
		token = token._replace(type=OP)
		if token.type in {ASYNC, AWAIT}:
		token = token._replace(type=NAME)
		if token.type == NEWLINE:
		l_start, c_start = token.start
		l_end, c_end = token.end
		token = token._replace(string='\n', start=(l_start, c_start), end=(l_end, c_end+1))

		# Add an implicit NEWLINE if the input doesn't end in one
		if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
		yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
		for indent in indents[1:]: # pop remaining indent levels
		yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
		yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
		yield token
		if token is not None:
		last_line, _ = token.start
		yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')

		tokenize = tokenize2

		def generate_tokens(readline):
		"""Tokenize a source reading Python code as unicode strings.
Expand All		@@ -658,7 +480,7 @@ def _gen():
		if not line:
		return
		yield line.encode()
		return_tokenize2(_gen(), 'utf-8')
		return_tokenize(_gen(), 'utf-8')

		def main():
		import argparse
Expand Down