This commit is contained in:
Waylon Walker 2022-03-31 20:20:07 -05:00
commit 38355d2442
No known key found for this signature in database
GPG key ID: 66E2BF2B4190EFE4
9083 changed files with 1225834 additions and 0 deletions

View file

@ -0,0 +1,256 @@
# pyflyby/_idents.py.
# Copyright (C) 2011, 2012, 2013, 2014, 2018 Karl Chen.
# License: MIT http://opensource.org/licenses/MIT
from __future__ import (absolute_import, division, print_function,
with_statement)
from functools import total_ordering
from keyword import kwlist
import re
import six
from pyflyby._util import cached_attribute, cmp
# Don't consider "print" a keyword, in order to be compatible with user code
# that uses "from __future__ import print_function".
_my_kwlist = list(kwlist)
if six.PY2:
_my_kwlist.remove("print")
_my_iskeyword = frozenset(_my_kwlist).__contains__
# TODO: use DottedIdentifier.prefixes
def dotted_prefixes(dotted_name, reverse=False):
"""
Return the prefixes of a dotted name.
>>> dotted_prefixes("aa.bb.cc")
['aa', 'aa.bb', 'aa.bb.cc']
>>> dotted_prefixes("aa.bb.cc", reverse=True)
['aa.bb.cc', 'aa.bb', 'aa']
:type dotted_name:
``str``
:param reverse:
If False (default), return shortest to longest. If True, return longest
to shortest.
:rtype:
``list`` of ``str``
"""
name_parts = dotted_name.split(".")
if reverse:
idxes = range(len(name_parts), 0, -1)
else:
idxes = range(1, len(name_parts)+1)
result = ['.'.join(name_parts[:i]) or '.' for i in idxes]
return result
_name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
_dotted_name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*([.][a-zA-Z_][a-zA-Z0-9_]*)*$")
_dotted_name_prefix_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*([.][a-zA-Z_][a-zA-Z0-9_]*)*[.]?$")
def is_identifier(s, dotted=False, prefix=False):
"""
Return whether ``s`` is a valid Python identifier name.
>>> is_identifier("foo")
True
>>> is_identifier("foo+bar")
False
>>> is_identifier("from")
False
By default, we check whether ``s`` is a single valid identifier, meaning
dots are not allowed. If ``dotted=True``, then we check each dotted
component::
>>> is_identifier("foo.bar")
False
>>> is_identifier("foo.bar", dotted=True)
True
>>> is_identifier("foo..bar", dotted=True)
False
>>> is_identifier("foo.from", dotted=True)
False
By default, the string must comprise a valid identifier. If
``prefix=True``, then allow strings that are prefixes of valid identifiers.
Prefix=False excludes the empty string, strings with a trailing dot, and
strings with a trailing keyword component, but prefix=True does not
exclude these.
>>> is_identifier("foo.bar.", dotted=True)
False
>>> is_identifier("foo.bar.", dotted=True, prefix=True)
True
>>> is_identifier("foo.or", dotted=True)
False
>>> is_identifier("foo.or", dotted=True, prefix=True)
True
:type s:
``str``
:param dotted:
If ``False`` (default), then the input must be a single name such as
"foo". If ``True``, then the input can be a single name or a dotted name
such as "foo.bar.baz".
:param prefix:
If ``False`` (Default), then the input must be a valid identifier. If
``True``, then the input can be a valid identifier or the prefix of a
valid identifier.
:rtype:
``bool``
"""
if not isinstance(s, six.string_types):
raise TypeError("is_identifier(): expected a string; got a %s"
% (type(s).__name__,))
if six.PY3:
if prefix:
return is_identifier(s + '_', dotted=dotted, prefix=False)
if dotted:
return all(is_identifier(w, dotted=False) for w in s.split('.'))
return s.isidentifier() and not _my_iskeyword(s)
if prefix:
if not s:
return True
if dotted:
return bool(
_dotted_name_prefix_re.match(s) and
not any(_my_iskeyword(w) for w in s.split(".")[:-1]))
else:
return bool(_name_re.match(s))
else:
if dotted:
# Use a regular expression that works for dotted names. (As an
# alternate implementation, one could imagine calling
# all(is_identifier(w) for w in s.split(".")). We don't do that
# because s could be a long text string.)
return bool(
_dotted_name_re.match(s) and
not any(_my_iskeyword(w) for w in s.split(".")))
else:
return bool(_name_re.match(s) and not _my_iskeyword(s))
def brace_identifiers(text):
"""
Parse a string and yield all tokens of the form "{some_token}".
>>> list(brace_identifiers("{salutation}, {your_name}."))
['salutation', 'your_name']
"""
if isinstance(text, bytes):
text = text.decode('utf-8', errors='replace')
for match in re.finditer("{([a-zA-Z_][a-zA-Z0-9_]*)}", text):
yield match.group(1)
class BadDottedIdentifierError(ValueError):
pass
# TODO: Use in various places, esp where e.g. dotted_prefixes is used.
@total_ordering
class DottedIdentifier(object):
def __new__(cls, arg):
if isinstance(arg, cls):
return arg
if isinstance(arg, six.string_types):
return cls._from_name(arg)
if isinstance(arg, (tuple, list)):
return cls._from_name(".".join(arg))
raise TypeError("DottedIdentifier: unexpected %s"
% (type(arg).__name__,))
@classmethod
def _from_name(cls, name):
self = object.__new__(cls)
self.name = str(name)
if not is_identifier(self.name, dotted=True):
if len(self.name) > 20:
raise BadDottedIdentifierError("Invalid python symbol name")
else:
raise BadDottedIdentifierError("Invalid python symbol name %r"
% (name,))
self.parts = tuple(self.name.split('.'))
return self
@cached_attribute
def parent(self):
if len(self.parts) > 1:
return DottedIdentifier('.'.join(self.parts[:-1]))
else:
return None
@cached_attribute
def prefixes(self):
parts = self.parts
idxes = range(1, len(parts)+1)
result = ['.'.join(parts[:i]) for i in idxes]
return tuple(DottedIdentifier(x) for x in result)
def startswith(self, o):
o = type(self)(o)
return self.parts[:len(o.parts)] == o.parts
def __getitem__(self, x):
return type(self)(self.parts[x])
def __len__(self):
return len(self.parts)
def __iter__(self):
return (type(self)(x) for x in self.parts)
def __add__(self, suffix):
return type(self)("%s.%s") % (self, suffix)
def __str__(self):
return self.name
def __repr__(self):
return "%s(%r)" % (type(self).__name__, self.name)
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
if self is other:
return True
if not isinstance(other, DottedIdentifier):
return NotImplemented
return self.name == other.name
def __ne__(self, other):
if self is other:
return False
if not isinstance(other, DottedIdentifier):
return NotImplemented
return self.name != other.name
# The rest are defined by total_ordering
def __lt__(self, other):
if not isinstance(other, DottedIdentifier):
return NotImplemented
return self.name < other.name
def __cmp__(self, other):
if self is other:
return 0
if not isinstance(other, DottedIdentifier):
return NotImplemented
return cmp(self.name, other.name)