256 lines
7.6 KiB
Python
256 lines
7.6 KiB
Python
# pyflyby/_idents.py.
|
|
# Copyright (C) 2011, 2012, 2013, 2014, 2018 Karl Chen.
|
|
# License: MIT http://opensource.org/licenses/MIT
|
|
|
|
from __future__ import (absolute_import, division, print_function,
|
|
with_statement)
|
|
|
|
from functools import total_ordering
|
|
from keyword import kwlist
|
|
import re
|
|
import six
|
|
|
|
from pyflyby._util import cached_attribute, cmp
|
|
|
|
|
|
# Don't consider "print" a keyword, in order to be compatible with user code
|
|
# that uses "from __future__ import print_function".
|
|
_my_kwlist = list(kwlist)
|
|
if six.PY2:
|
|
_my_kwlist.remove("print")
|
|
_my_iskeyword = frozenset(_my_kwlist).__contains__
|
|
|
|
|
|
# TODO: use DottedIdentifier.prefixes
|
|
def dotted_prefixes(dotted_name, reverse=False):
|
|
"""
|
|
Return the prefixes of a dotted name.
|
|
|
|
>>> dotted_prefixes("aa.bb.cc")
|
|
['aa', 'aa.bb', 'aa.bb.cc']
|
|
|
|
>>> dotted_prefixes("aa.bb.cc", reverse=True)
|
|
['aa.bb.cc', 'aa.bb', 'aa']
|
|
|
|
:type dotted_name:
|
|
``str``
|
|
:param reverse:
|
|
If False (default), return shortest to longest. If True, return longest
|
|
to shortest.
|
|
:rtype:
|
|
``list`` of ``str``
|
|
"""
|
|
name_parts = dotted_name.split(".")
|
|
if reverse:
|
|
idxes = range(len(name_parts), 0, -1)
|
|
else:
|
|
idxes = range(1, len(name_parts)+1)
|
|
result = ['.'.join(name_parts[:i]) or '.' for i in idxes]
|
|
return result
|
|
|
|
|
|
_name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
|
|
_dotted_name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*([.][a-zA-Z_][a-zA-Z0-9_]*)*$")
|
|
_dotted_name_prefix_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*([.][a-zA-Z_][a-zA-Z0-9_]*)*[.]?$")
|
|
|
|
|
|
def is_identifier(s, dotted=False, prefix=False):
|
|
"""
|
|
Return whether ``s`` is a valid Python identifier name.
|
|
|
|
>>> is_identifier("foo")
|
|
True
|
|
|
|
>>> is_identifier("foo+bar")
|
|
False
|
|
|
|
>>> is_identifier("from")
|
|
False
|
|
|
|
By default, we check whether ``s`` is a single valid identifier, meaning
|
|
dots are not allowed. If ``dotted=True``, then we check each dotted
|
|
component::
|
|
|
|
>>> is_identifier("foo.bar")
|
|
False
|
|
|
|
>>> is_identifier("foo.bar", dotted=True)
|
|
True
|
|
|
|
>>> is_identifier("foo..bar", dotted=True)
|
|
False
|
|
|
|
>>> is_identifier("foo.from", dotted=True)
|
|
False
|
|
|
|
By default, the string must comprise a valid identifier. If
|
|
``prefix=True``, then allow strings that are prefixes of valid identifiers.
|
|
Prefix=False excludes the empty string, strings with a trailing dot, and
|
|
strings with a trailing keyword component, but prefix=True does not
|
|
exclude these.
|
|
|
|
>>> is_identifier("foo.bar.", dotted=True)
|
|
False
|
|
|
|
>>> is_identifier("foo.bar.", dotted=True, prefix=True)
|
|
True
|
|
|
|
>>> is_identifier("foo.or", dotted=True)
|
|
False
|
|
|
|
>>> is_identifier("foo.or", dotted=True, prefix=True)
|
|
True
|
|
|
|
:type s:
|
|
``str``
|
|
:param dotted:
|
|
If ``False`` (default), then the input must be a single name such as
|
|
"foo". If ``True``, then the input can be a single name or a dotted name
|
|
such as "foo.bar.baz".
|
|
:param prefix:
|
|
If ``False`` (Default), then the input must be a valid identifier. If
|
|
``True``, then the input can be a valid identifier or the prefix of a
|
|
valid identifier.
|
|
:rtype:
|
|
``bool``
|
|
"""
|
|
if not isinstance(s, six.string_types):
|
|
raise TypeError("is_identifier(): expected a string; got a %s"
|
|
% (type(s).__name__,))
|
|
if six.PY3:
|
|
if prefix:
|
|
return is_identifier(s + '_', dotted=dotted, prefix=False)
|
|
if dotted:
|
|
return all(is_identifier(w, dotted=False) for w in s.split('.'))
|
|
return s.isidentifier() and not _my_iskeyword(s)
|
|
|
|
if prefix:
|
|
if not s:
|
|
return True
|
|
if dotted:
|
|
return bool(
|
|
_dotted_name_prefix_re.match(s) and
|
|
not any(_my_iskeyword(w) for w in s.split(".")[:-1]))
|
|
else:
|
|
return bool(_name_re.match(s))
|
|
else:
|
|
if dotted:
|
|
# Use a regular expression that works for dotted names. (As an
|
|
# alternate implementation, one could imagine calling
|
|
# all(is_identifier(w) for w in s.split(".")). We don't do that
|
|
# because s could be a long text string.)
|
|
return bool(
|
|
_dotted_name_re.match(s) and
|
|
not any(_my_iskeyword(w) for w in s.split(".")))
|
|
else:
|
|
return bool(_name_re.match(s) and not _my_iskeyword(s))
|
|
|
|
|
|
def brace_identifiers(text):
|
|
"""
|
|
Parse a string and yield all tokens of the form "{some_token}".
|
|
|
|
>>> list(brace_identifiers("{salutation}, {your_name}."))
|
|
['salutation', 'your_name']
|
|
"""
|
|
if isinstance(text, bytes):
|
|
text = text.decode('utf-8', errors='replace')
|
|
for match in re.finditer("{([a-zA-Z_][a-zA-Z0-9_]*)}", text):
|
|
yield match.group(1)
|
|
|
|
|
|
class BadDottedIdentifierError(ValueError):
|
|
pass
|
|
|
|
|
|
# TODO: Use in various places, esp where e.g. dotted_prefixes is used.
|
|
@total_ordering
|
|
class DottedIdentifier(object):
|
|
def __new__(cls, arg):
|
|
if isinstance(arg, cls):
|
|
return arg
|
|
if isinstance(arg, six.string_types):
|
|
return cls._from_name(arg)
|
|
if isinstance(arg, (tuple, list)):
|
|
return cls._from_name(".".join(arg))
|
|
raise TypeError("DottedIdentifier: unexpected %s"
|
|
% (type(arg).__name__,))
|
|
|
|
@classmethod
|
|
def _from_name(cls, name):
|
|
self = object.__new__(cls)
|
|
self.name = str(name)
|
|
if not is_identifier(self.name, dotted=True):
|
|
if len(self.name) > 20:
|
|
raise BadDottedIdentifierError("Invalid python symbol name")
|
|
else:
|
|
raise BadDottedIdentifierError("Invalid python symbol name %r"
|
|
% (name,))
|
|
self.parts = tuple(self.name.split('.'))
|
|
return self
|
|
|
|
@cached_attribute
|
|
def parent(self):
|
|
if len(self.parts) > 1:
|
|
return DottedIdentifier('.'.join(self.parts[:-1]))
|
|
else:
|
|
return None
|
|
|
|
@cached_attribute
|
|
def prefixes(self):
|
|
parts = self.parts
|
|
idxes = range(1, len(parts)+1)
|
|
result = ['.'.join(parts[:i]) for i in idxes]
|
|
return tuple(DottedIdentifier(x) for x in result)
|
|
|
|
def startswith(self, o):
|
|
o = type(self)(o)
|
|
return self.parts[:len(o.parts)] == o.parts
|
|
|
|
def __getitem__(self, x):
|
|
return type(self)(self.parts[x])
|
|
|
|
def __len__(self):
|
|
return len(self.parts)
|
|
|
|
def __iter__(self):
|
|
return (type(self)(x) for x in self.parts)
|
|
|
|
def __add__(self, suffix):
|
|
return type(self)("%s.%s") % (self, suffix)
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
def __repr__(self):
|
|
return "%s(%r)" % (type(self).__name__, self.name)
|
|
|
|
def __hash__(self):
|
|
return hash(self.name)
|
|
|
|
def __eq__(self, other):
|
|
if self is other:
|
|
return True
|
|
if not isinstance(other, DottedIdentifier):
|
|
return NotImplemented
|
|
return self.name == other.name
|
|
|
|
def __ne__(self, other):
|
|
if self is other:
|
|
return False
|
|
if not isinstance(other, DottedIdentifier):
|
|
return NotImplemented
|
|
return self.name != other.name
|
|
|
|
# The rest are defined by total_ordering
|
|
def __lt__(self, other):
|
|
if not isinstance(other, DottedIdentifier):
|
|
return NotImplemented
|
|
return self.name < other.name
|
|
|
|
def __cmp__(self, other):
|
|
if self is other:
|
|
return 0
|
|
if not isinstance(other, DottedIdentifier):
|
|
return NotImplemented
|
|
return cmp(self.name, other.name)
|