init

2022-03-31 20:20:07 -05:00 · 2022-03-31 20:20:07 -05:00 · 38355d2442
commit 38355d2442
9083 changed files with 1225834 additions and 0 deletions
--- a/.venv/lib/python3.8/site-packages/parso/python/tokenize.py
+++ b/.venv/lib/python3.8/site-packages/parso/python/tokenize.py
@ -0,0 +1,687 @@
+# -*- coding: utf-8 -*-
+"""
+This tokenizer has been copied from the ``tokenize.py`` standard library
+tokenizer. The reason was simple: The standard library tokenizer fails
+if the indentation is not right. To make it possible to do error recovery the
+    tokenizer needed to be rewritten.
+
+Basically this is a stripped down version of the standard library module, so
+you can read the documentation there. Additionally we included some speed and
+memory optimizations here.
+"""
+from __future__ import absolute_import
+
+import sys
+import re
+import itertools as _itertools
+from codecs import BOM_UTF8
+from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
+    Pattern, Set
+
+from parso.python.token import PythonTokenTypes
+from parso.utils import split_lines, PythonVersionInfo, parse_version_string
+
+
+# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
+MAX_UNICODE = '\U0010ffff'
+
+STRING = PythonTokenTypes.STRING
+NAME = PythonTokenTypes.NAME
+NUMBER = PythonTokenTypes.NUMBER
+OP = PythonTokenTypes.OP
+NEWLINE = PythonTokenTypes.NEWLINE
+INDENT = PythonTokenTypes.INDENT
+DEDENT = PythonTokenTypes.DEDENT
+ENDMARKER = PythonTokenTypes.ENDMARKER
+ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
+ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
+FSTRING_START = PythonTokenTypes.FSTRING_START
+FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
+FSTRING_END = PythonTokenTypes.FSTRING_END
+
+
+class TokenCollection(NamedTuple):
+    pseudo_token: Pattern
+    single_quoted: Set[str]
+    triple_quoted: Set[str]
+    endpats: Dict[str, Pattern]
+    whitespace: Pattern
+    fstring_pattern_map: Dict[str, str]
+    always_break_tokens: Tuple[str]
+
+
+BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
+
+_token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {}
+
+
+def group(*choices, capture=False, **kwargs):
+    assert not kwargs
+
+    start = '('
+    if not capture:
+        start += '?:'
+    return start + '|'.join(choices) + ')'
+
+
+def maybe(*choices):
+    return group(*choices) + '?'
+
+
+# Return the empty string, plus all of the valid string prefixes.
+def _all_string_prefixes(*, include_fstring=False, only_fstring=False):
+    def different_case_versions(prefix):
+        for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
+            yield ''.join(s)
+    # The valid string prefixes. Only contain the lower case versions,
+    #  and don't contain any permuations (include 'fr', but not
+    #  'rf'). The various permutations will be generated.
+    valid_string_prefixes = ['b', 'r', 'u', 'br']
+
+    result = {''}
+    if include_fstring:
+        f = ['f', 'fr']
+        if only_fstring:
+            valid_string_prefixes = f
+            result = set()
+        else:
+            valid_string_prefixes += f
+    elif only_fstring:
+        return set()
+
+    # if we add binary f-strings, add: ['fb', 'fbr']
+    for prefix in valid_string_prefixes:
+        for t in _itertools.permutations(prefix):
+            # create a list with upper and lower versions of each
+            #  character
+            result.update(different_case_versions(t))
+    return result
+
+
+def _compile(expr):
+    return re.compile(expr, re.UNICODE)
+
+
+def _get_token_collection(version_info):
+    try:
+        return _token_collection_cache[tuple(version_info)]
+    except KeyError:
+        _token_collection_cache[tuple(version_info)] = result = \
+            _create_token_collection(version_info)
+        return result
+
+
+unicode_character_name = r'[A-Za-z0-9\-]+(?: [A-Za-z0-9\-]+)*'
+fstring_string_single_line = _compile(
+    r'(?:\{\{|\}\}|\\N\{' + unicode_character_name
+    + r'\}|\\(?:\r\n?|\n)|\\[^\r\nN]|[^{}\r\n\\])+'
+)
+fstring_string_multi_line = _compile(
+    r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + r'\}|\\[^N]|[^{}\\])+'
+)
+fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
+fstring_format_spec_multi_line = _compile(r'[^{}]+')
+
+
+def _create_token_collection(version_info):
+    # Note: we use unicode matching for names ("\w") but ascii matching for
+    # number literals.
+    Whitespace = r'[ \f\t]*'
+    whitespace = _compile(Whitespace)
+    Comment = r'#[^\r\n]*'
+    Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
+
+    Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
+    Binnumber = r'0[bB](?:_?[01])+'
+    Octnumber = r'0[oO](?:_?[0-7])+'
+    Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
+    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+    Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
+    Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
+                       r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
+    Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
+    Floatnumber = group(Pointfloat, Expfloat)
+    Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
+    Number = group(Imagnumber, Floatnumber, Intnumber)
+
+    # Note that since _all_string_prefixes includes the empty string,
+    #  StringPrefix can be the empty string (making it optional).
+    possible_prefixes = _all_string_prefixes()
+    StringPrefix = group(*possible_prefixes)
+    StringPrefixWithF = group(*_all_string_prefixes(include_fstring=True))
+    fstring_prefixes = _all_string_prefixes(include_fstring=True, only_fstring=True)
+    FStringStart = group(*fstring_prefixes)
+
+    # Tail end of ' string.
+    Single = r"(?:\\.|[^'\\])*'"
+    # Tail end of " string.
+    Double = r'(?:\\.|[^"\\])*"'
+    # Tail end of ''' string.
+    Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
+    # Tail end of """ string.
+    Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
+    Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
+
+    # Because of leftmost-then-longest match semantics, be sure to put the
+    # longest operators first (e.g., if = came before ==, == would get
+    # recognized as two instances of =).
+    Operator = group(r"\*\*=?", r">>=?", r"<<=?",
+                     r"//=?", r"->",
+                     r"[+\-*/%&@`|^!=<>]=?",
+                     r"~")
+
+    Bracket = '[][(){}]'
+
+    special_args = [r'\.\.\.', r'\r\n?', r'\n', r'[;.,@]']
+    if version_info >= (3, 8):
+        special_args.insert(0, ":=?")
+    else:
+        special_args.insert(0, ":")
+    Special = group(*special_args)
+
+    Funny = group(Operator, Bracket, Special)
+
+    # First (or only) line of ' or " string.
+    ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*"
+                    + group("'", r'\\(?:\r\n?|\n)'),
+                    StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*'
+                    + group('"', r'\\(?:\r\n?|\n)'))
+    pseudo_extra_pool = [Comment, Triple]
+    all_quotes = '"', "'", '"""', "'''"
+    if fstring_prefixes:
+        pseudo_extra_pool.append(FStringStart + group(*all_quotes))
+
+    PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
+    PseudoToken = group(Whitespace, capture=True) + \
+        group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
+
+    # For a given string prefix plus quotes, endpats maps it to a regex
+    #  to match the remainder of that string. _prefix can be empty, for
+    #  a normal single or triple quoted string (with no prefix).
+    endpats = {}
+    for _prefix in possible_prefixes:
+        endpats[_prefix + "'"] = _compile(Single)
+        endpats[_prefix + '"'] = _compile(Double)
+        endpats[_prefix + "'''"] = _compile(Single3)
+        endpats[_prefix + '"""'] = _compile(Double3)
+
+    # A set of all of the single and triple quoted string prefixes,
+    #  including the opening quotes.
+    single_quoted = set()
+    triple_quoted = set()
+    fstring_pattern_map = {}
+    for t in possible_prefixes:
+        for quote in '"', "'":
+            single_quoted.add(t + quote)
+
+        for quote in '"""', "'''":
+            triple_quoted.add(t + quote)
+
+    for t in fstring_prefixes:
+        for quote in all_quotes:
+            fstring_pattern_map[t + quote] = quote
+
+    ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
+                           'finally', 'while', 'with', 'return', 'continue',
+                           'break', 'del', 'pass', 'global', 'assert', 'nonlocal')
+    pseudo_token_compiled = _compile(PseudoToken)
+    return TokenCollection(
+        pseudo_token_compiled, single_quoted, triple_quoted, endpats,
+        whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS)
+    )
+
+
+class Token(NamedTuple):
+    type: PythonTokenTypes
+    string: str
+    start_pos: Tuple[int, int]
+    prefix: str
+
+    @property
+    def end_pos(self) -> Tuple[int, int]:
+        lines = split_lines(self.string)
+        if len(lines) > 1:
+            return self.start_pos[0] + len(lines) - 1, 0
+        else:
+            return self.start_pos[0], self.start_pos[1] + len(self.string)
+
+
+class PythonToken(Token):
+    def __repr__(self):
+        return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
+                self._replace(type=self.type.name))
+
+
+class FStringNode:
+    def __init__(self, quote):
+        self.quote = quote
+        self.parentheses_count = 0
+        self.previous_lines = ''
+        self.last_string_start_pos = None
+        # In the syntax there can be multiple format_spec's nested:
+        # {x:{y:3}}
+        self.format_spec_count = 0
+
+    def open_parentheses(self, character):
+        self.parentheses_count += 1
+
+    def close_parentheses(self, character):
+        self.parentheses_count -= 1
+        if self.parentheses_count == 0:
+            # No parentheses means that the format spec is also finished.
+            self.format_spec_count = 0
+
+    def allow_multiline(self):
+        return len(self.quote) == 3
+
+    def is_in_expr(self):
+        return self.parentheses_count > self.format_spec_count
+
+    def is_in_format_spec(self):
+        return not self.is_in_expr() and self.format_spec_count
+
+
+def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix):
+    for fstring_stack_index, node in enumerate(fstring_stack):
+        lstripped_string = string.lstrip()
+        len_lstrip = len(string) - len(lstripped_string)
+        if lstripped_string.startswith(node.quote):
+            token = PythonToken(
+                FSTRING_END,
+                node.quote,
+                (line_nr, column + len_lstrip),
+                prefix=additional_prefix+string[:len_lstrip],
+            )
+            additional_prefix = ''
+            assert not node.previous_lines
+            del fstring_stack[fstring_stack_index:]
+            return token, '', len(node.quote) + len_lstrip
+    return None, additional_prefix, 0
+
+
+def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
+    tos = fstring_stack[-1]
+    allow_multiline = tos.allow_multiline()
+    if tos.is_in_format_spec():
+        if allow_multiline:
+            regex = fstring_format_spec_multi_line
+        else:
+            regex = fstring_format_spec_single_line
+    else:
+        if allow_multiline:
+            regex = fstring_string_multi_line
+        else:
+            regex = fstring_string_single_line
+
+    match = regex.match(line, pos)
+    if match is None:
+        return tos.previous_lines, pos
+
+    if not tos.previous_lines:
+        tos.last_string_start_pos = (lnum, pos)
+
+    string = match.group(0)
+    for fstring_stack_node in fstring_stack:
+        end_match = endpats[fstring_stack_node.quote].match(string)
+        if end_match is not None:
+            string = end_match.group(0)[:-len(fstring_stack_node.quote)]
+
+    new_pos = pos
+    new_pos += len(string)
+    # even if allow_multiline is False, we still need to check for trailing
+    # newlines, because a single-line f-string can contain line continuations
+    if string.endswith('\n') or string.endswith('\r'):
+        tos.previous_lines += string
+        string = ''
+    else:
+        string = tos.previous_lines + string
+
+    return string, new_pos
+
+
+def tokenize(
+    code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
+) -> Iterator[PythonToken]:
+    """Generate tokens from a the source code (string)."""
+    lines = split_lines(code, keepends=True)
+    return tokenize_lines(lines, version_info=version_info, start_pos=start_pos)
+
+
+def _print_tokens(func):
+    """
+    A small helper function to help debug the tokenize_lines function.
+    """
+    def wrapper(*args, **kwargs):
+        for token in func(*args, **kwargs):
+            print(token)  # This print is intentional for debugging!
+            yield token
+
+    return wrapper
+
+
+# @_print_tokens
+def tokenize_lines(
+    lines: Iterable[str],
+    *,
+    version_info: PythonVersionInfo,
+    indents: List[int] = None,
+    start_pos: Tuple[int, int] = (1, 0),
+    is_first_token=True,
+) -> Iterator[PythonToken]:
+    """
+    A heavily modified Python standard library tokenizer.
+
+    Additionally to the default information, yields also the prefix of each
+    token. This idea comes from lib2to3. The prefix contains all information
+    that is irrelevant for the parser like newlines in parentheses or comments.
+    """
+    def dedent_if_necessary(start):
+        while start < indents[-1]:
+            if start > indents[-2]:
+                yield PythonToken(ERROR_DEDENT, '', (lnum, start), '')
+                indents[-1] = start
+                break
+            indents.pop()
+            yield PythonToken(DEDENT, '', spos, '')
+
+    pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
+        fstring_pattern_map, always_break_tokens, = \
+        _get_token_collection(version_info)
+    paren_level = 0  # count parentheses
+    if indents is None:
+        indents = [0]
+    max_ = 0
+    numchars = '0123456789'
+    contstr = ''
+    contline: str
+    contstr_start: Tuple[int, int]
+    endprog: Pattern
+    # We start with a newline. This makes indent at the first position
+    # possible. It's not valid Python, but still better than an INDENT in the
+    # second line (and not in the first). This makes quite a few things in
+    # Jedi's fast parser possible.
+    new_line = True
+    prefix = ''  # Should never be required, but here for safety
+    additional_prefix = ''
+    lnum = start_pos[0] - 1
+    fstring_stack: List[FStringNode] = []
+    for line in lines:  # loop over lines in stream
+        lnum += 1
+        pos = 0
+        max_ = len(line)
+        if is_first_token:
+            if line.startswith(BOM_UTF8_STRING):
+                additional_prefix = BOM_UTF8_STRING
+                line = line[1:]
+                max_ = len(line)
+
+            # Fake that the part before was already parsed.
+            line = '^' * start_pos[1] + line
+            pos = start_pos[1]
+            max_ += start_pos[1]
+
+            is_first_token = False
+
+        if contstr:                                         # continued string
+            endmatch = endprog.match(line)  # noqa: F821
+            if endmatch:
+                pos = endmatch.end(0)
+                yield PythonToken(
+                    STRING, contstr + line[:pos],
+                    contstr_start, prefix)  # noqa: F821
+                contstr = ''
+                contline = ''
+            else:
+                contstr = contstr + line
+                contline = contline + line
+                continue
+
+        while pos < max_:
+            if fstring_stack:
+                tos = fstring_stack[-1]
+                if not tos.is_in_expr():
+                    string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
+                    if string:
+                        yield PythonToken(
+                            FSTRING_STRING, string,
+                            tos.last_string_start_pos,
+                            # Never has a prefix because it can start anywhere and
+                            # include whitespace.
+                            prefix=''
+                        )
+                        tos.previous_lines = ''
+                        continue
+                    if pos == max_:
+                        break
+
+                rest = line[pos:]
+                fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
+                    fstring_stack,
+                    rest,
+                    lnum,
+                    pos,
+                    additional_prefix,
+                )
+                pos += quote_length
+                if fstring_end_token is not None:
+                    yield fstring_end_token
+                    continue
+
+            # in an f-string, match until the end of the string
+            if fstring_stack:
+                string_line = line
+                for fstring_stack_node in fstring_stack:
+                    quote = fstring_stack_node.quote
+                    end_match = endpats[quote].match(line, pos)
+                    if end_match is not None:
+                        end_match_string = end_match.group(0)
+                        if len(end_match_string) - len(quote) + pos < len(string_line):
+                            string_line = line[:pos] + end_match_string[:-len(quote)]
+                pseudomatch = pseudo_token.match(string_line, pos)
+            else:
+                pseudomatch = pseudo_token.match(line, pos)
+
+            if pseudomatch:
+                prefix = additional_prefix + pseudomatch.group(1)
+                additional_prefix = ''
+                start, pos = pseudomatch.span(2)
+                spos = (lnum, start)
+                token = pseudomatch.group(2)
+                if token == '':
+                    assert prefix
+                    additional_prefix = prefix
+                    # This means that we have a line with whitespace/comments at
+                    # the end, which just results in an endmarker.
+                    break
+                initial = token[0]
+            else:
+                match = whitespace.match(line, pos)
+                initial = line[match.end()]
+                start = match.end()
+                spos = (lnum, start)
+
+            if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None):
+                new_line = False
+                if paren_level == 0 and not fstring_stack:
+                    indent_start = start
+                    if indent_start > indents[-1]:
+                        yield PythonToken(INDENT, '', spos, '')
+                        indents.append(indent_start)
+                    yield from dedent_if_necessary(indent_start)
+
+            if not pseudomatch:  # scan for tokens
+                match = whitespace.match(line, pos)
+                if new_line and paren_level == 0 and not fstring_stack:
+                    yield from dedent_if_necessary(match.end())
+                pos = match.end()
+                new_line = False
+                yield PythonToken(
+                    ERRORTOKEN, line[pos], (lnum, pos),
+                    additional_prefix + match.group(0)
+                )
+                additional_prefix = ''
+                pos += 1
+                continue
+
+            if (initial in numchars                      # ordinary number
+                    or (initial == '.' and token != '.' and token != '...')):
+                yield PythonToken(NUMBER, token, spos, prefix)
+            elif pseudomatch.group(3) is not None:            # ordinary name
+                if token in always_break_tokens and (fstring_stack or paren_level):
+                    fstring_stack[:] = []
+                    paren_level = 0
+                    # We only want to dedent if the token is on a new line.
+                    m = re.match(r'[ \f\t]*$', line[:start])
+                    if m is not None:
+                        yield from dedent_if_necessary(m.end())
+                if token.isidentifier():
+                    yield PythonToken(NAME, token, spos, prefix)
+                else:
+                    yield from _split_illegal_unicode_name(token, spos, prefix)
+            elif initial in '\r\n':
+                if any(not f.allow_multiline() for f in fstring_stack):
+                    fstring_stack.clear()
+
+                if not new_line and paren_level == 0 and not fstring_stack:
+                    yield PythonToken(NEWLINE, token, spos, prefix)
+                else:
+                    additional_prefix = prefix + token
+                new_line = True
+            elif initial == '#':  # Comments
+                assert not token.endswith("\n") and not token.endswith("\r")
+                if fstring_stack and fstring_stack[-1].is_in_expr():
+                    # `#` is not allowed in f-string expressions
+                    yield PythonToken(ERRORTOKEN, initial, spos, prefix)
+                    pos = start + 1
+                else:
+                    additional_prefix = prefix + token
+            elif token in triple_quoted:
+                endprog = endpats[token]
+                endmatch = endprog.match(line, pos)
+                if endmatch:                                # all on one line
+                    pos = endmatch.end(0)
+                    token = line[start:pos]
+                    yield PythonToken(STRING, token, spos, prefix)
+                else:
+                    contstr_start = spos                    # multiple lines
+                    contstr = line[start:]
+                    contline = line
+                    break
+
+            # Check up to the first 3 chars of the token to see if
+            #  they're in the single_quoted set. If so, they start
+            #  a string.
+            # We're using the first 3, because we're looking for
+            #  "rb'" (for example) at the start of the token. If
+            #  we switch to longer prefixes, this needs to be
+            #  adjusted.
+            # Note that initial == token[:1].
+            # Also note that single quote checking must come after
+            #  triple quote checking (above).
+            elif initial in single_quoted or \
+                    token[:2] in single_quoted or \
+                    token[:3] in single_quoted:
+                if token[-1] in '\r\n':                       # continued string
+                    # This means that a single quoted string ends with a
+                    # backslash and is continued.
+                    contstr_start = lnum, start
+                    endprog = (endpats.get(initial) or endpats.get(token[1])
+                               or endpats.get(token[2]))
+                    contstr = line[start:]
+                    contline = line
+                    break
+                else:                                       # ordinary string
+                    yield PythonToken(STRING, token, spos, prefix)
+            elif token in fstring_pattern_map:  # The start of an fstring.
+                fstring_stack.append(FStringNode(fstring_pattern_map[token]))
+                yield PythonToken(FSTRING_START, token, spos, prefix)
+            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'):  # continued stmt
+                additional_prefix += prefix + line[start:]
+                break
+            else:
+                if token in '([{':
+                    if fstring_stack:
+                        fstring_stack[-1].open_parentheses(token)
+                    else:
+                        paren_level += 1
+                elif token in ')]}':
+                    if fstring_stack:
+                        fstring_stack[-1].close_parentheses(token)
+                    else:
+                        if paren_level:
+                            paren_level -= 1
+                elif token.startswith(':') and fstring_stack \
+                        and fstring_stack[-1].parentheses_count \
+                        - fstring_stack[-1].format_spec_count == 1:
+                    # `:` and `:=` both count
+                    fstring_stack[-1].format_spec_count += 1
+                    token = ':'
+                    pos = start + 1
+
+                yield PythonToken(OP, token, spos, prefix)
+
+    if contstr:
+        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
+        if contstr.endswith('\n') or contstr.endswith('\r'):
+            new_line = True
+
+    if fstring_stack:
+        tos = fstring_stack[-1]
+        if tos.previous_lines:
+            yield PythonToken(
+                FSTRING_STRING, tos.previous_lines,
+                tos.last_string_start_pos,
+                # Never has a prefix because it can start anywhere and
+                # include whitespace.
+                prefix=''
+            )
+
+    end_pos = lnum, max_
+    # As the last position we just take the maximally possible position. We
+    # remove -1 for the last new line.
+    for indent in indents[1:]:
+        indents.pop()
+        yield PythonToken(DEDENT, '', end_pos, '')
+    yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
+
+
+def _split_illegal_unicode_name(token, start_pos, prefix):
+    def create_token():
+        return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix)
+
+    found = ''
+    is_illegal = False
+    pos = start_pos
+    for i, char in enumerate(token):
+        if is_illegal:
+            if char.isidentifier():
+                yield create_token()
+                found = char
+                is_illegal = False
+                prefix = ''
+                pos = start_pos[0], start_pos[1] + i
+            else:
+                found += char
+        else:
+            new_found = found + char
+            if new_found.isidentifier():
+                found = new_found
+            else:
+                if found:
+                    yield create_token()
+                    prefix = ''
+                    pos = start_pos[0], start_pos[1] + i
+                found = char
+                is_illegal = True
+
+    if found:
+        yield create_token()
+
+
+if __name__ == "__main__":
+    path = sys.argv[1]
+    with open(path) as f:
+        code = f.read()
+
+    for token in tokenize(code, version_info=parse_version_string('3.10')):
+        print(token)