creeper-adventure/.venv/lib/python3.8/site-packages/pyflyby/_parse.py
2022-03-31 20:20:07 -05:00

1482 lines
55 KiB
Python

# pyflyby/_parse.py.
# Copyright (C) 2011, 2012, 2013, 2014, 2015, 2018 Karl Chen.
# License: MIT http://opensource.org/licenses/MIT
from __future__ import (absolute_import, division, print_function,
with_statement)
import ast
from collections import namedtuple
from doctest import DocTestParser
from functools import total_ordering
from itertools import groupby
import re
import sys
from textwrap import dedent
import types
import six
from six import PY2, PY3, text_type as unicode
from six.moves import range
from pyflyby._file import FilePos, FileText, Filename
from pyflyby._flags import CompilerFlags
from pyflyby._log import logger
from pyflyby._util import cached_attribute, cmp
if PY3:
from ast import Bytes
else:
Bytes = ast.Str
if sys.version_info >= (3, 8):
from ast import TypeIgnore
else:
# TypeIgnore does not exist on Python 3.7 and before.
# thus we define a dummy TypeIgnore just to simplify remaining code.
class TypeIgnore:
pass
def _is_comment_or_blank(line):
"""
Returns whether a line of python code contains only a comment is blank.
>>> _is_comment_or_blank("foo\\n")
False
>>> _is_comment_or_blank(" # blah\\n")
True
"""
return re.sub("#.*", "", line).rstrip() == ""
def _ast_str_literal_value(node):
if isinstance(node, (ast.Str, Bytes)):
return node.s
if isinstance(node, ast.Expr) and isinstance(node.value, (ast.Str, Bytes)):
return node.value.s
else:
return None
def _flatten_ast_nodes(arg):
if arg is None:
pass
elif isinstance(arg, ast.AST):
yield arg
elif isinstance(arg, str):
#FunctionDef type_comments
yield arg
elif isinstance(arg, (tuple, list, types.GeneratorType)):
for x in arg:
for y in _flatten_ast_nodes(x):
yield y
else:
raise TypeError(
"_flatten_ast_nodes: unexpected %s" % (type(arg).__name__,))
def _iter_child_nodes_in_order(node):
"""
Yield all direct child nodes of ``node``, that is, all fields that are nodes
and all items of fields that are lists of nodes.
``_iter_child_nodes_in_order`` yields nodes in the same order that they
appear in the source.
``ast.iter_child_nodes`` does the same thing, but not in source order.
e.g. for ``Dict`` s, it yields all key nodes before all value nodes.
"""
return _flatten_ast_nodes(_iter_child_nodes_in_order_internal_1(node))
def _iter_child_nodes_in_order_internal_1(node):
if isinstance(node, str):
# this happen for type comments which are not ast nodes but str
# they do not have children. We yield nothing.
yield []
return
if not isinstance(node, ast.AST):
raise TypeError
if isinstance(node, ast.Dict):
assert node._fields == ("keys", "values")
yield list(zip(node.keys, node.values))
elif isinstance(node, ast.FunctionDef):
if six.PY2:
assert node._fields == ('name', 'args', 'body', 'decorator_list'), node._fields
yield node.decorator_list, node.args, node.body
elif sys.version_info >= (3, 8):
assert node._fields == (
"name",
"args",
"body",
"decorator_list",
"returns",
"type_comment",
), node._fields
res = (
node.type_comment,
node.decorator_list,
node.args,
node.returns,
node.body,
)
yield res
else:
assert node._fields == ('name', 'args', 'body', 'decorator_list',
'returns'), node._fields
yield node.decorator_list, node.args, node.returns, node.body
# node.name is a string, not an AST node
elif isinstance(node, ast.arguments):
if six.PY2:
assert node._fields == ('args', 'vararg', 'kwarg', 'defaults'), node._fields
elif sys.version_info >= (3, 8):
assert node._fields == ('posonlyargs', 'args', 'vararg', 'kwonlyargs',
'kw_defaults', 'kwarg', 'defaults'), node._fields
else:
assert node._fields == ('args', 'vararg', 'kwonlyargs',
'kw_defaults', 'kwarg', 'defaults'), node._fields
defaults = node.defaults or ()
num_no_default = len(node.args)-len(defaults)
yield node.args[:num_no_default]
yield list(zip(node.args[num_no_default:], defaults))
# node.varags and node.kwarg are strings, not AST nodes.
elif isinstance(node, ast.IfExp):
assert node._fields == ('test', 'body', 'orelse')
yield node.body, node.test, node.orelse
elif isinstance(node, ast.Call):
# call arguments order are lost by ast, re-order them
yield node.func
args = sorted([(k.value.lineno, k.value.col_offset, k) for k in node.keywords]+
[(k.lineno,k.col_offset, k) for k in node.args])
yield [a[2] for a in args]
elif isinstance(node, ast.ClassDef):
if six.PY2:
assert node._fields == ('name', 'bases', 'body', 'decorator_list')
else:
assert node._fields == ('name', 'bases', 'keywords', 'body', 'decorator_list')
yield node.decorator_list, node.bases, node.body
# node.name is a string, not an AST node
elif sys.version_info >= (3, 7) and isinstance(node, ast.FormattedValue):
assert node._fields == ('value', 'conversion', 'format_spec')
yield node.value,
else:
# Default behavior.
yield ast.iter_child_nodes(node)
def _walk_ast_nodes_in_order(node):
"""
Recursively yield all child nodes of ``node``, in the same order that the
node appears in the source.
``ast.walk`` does the same thing, but yields nodes in an arbitrary order.
"""
# The implementation is basically the same as ``ast.walk``, but:
# 1. Use a stack instead of a deque. (I.e., depth-first search instead
# of breadth-first search.)
# 2. Use _iter_child_nodes_in_order instead of ``ast.iter_child_nodes``.
todo = [node]
while todo:
node = todo.pop()
yield node
todo.extend(reversed(list(_iter_child_nodes_in_order(node))))
def _flags_to_try(source, flags, auto_flags, mode):
"""
Flags to try for ``auto_flags``.
If ``auto_flags`` is False, then only yield ``flags``.
If ``auto_flags`` is True, then yield ``flags`` and ``flags ^ print_function``.
"""
flags = CompilerFlags(flags)
if sys.version_info >= (3, 8):
if re.search(r"# *type:", source):
flags = flags | CompilerFlags('type_comments')
yield flags
return
if not auto_flags:
yield flags
return
if PY3:
yield flags
return
if mode == "eval":
if re.search(r"\bprint\b", source):
flags = flags | CompilerFlags("print_function")
yield flags
return
yield flags
if re.search(r"\bprint\b", source):
yield flags ^ CompilerFlags("print_function")
def _parse_ast_nodes(text, flags, auto_flags, mode):
"""
Parse a block of lines into an AST.
Also annotate ``input_flags``, ``source_flags``, and ``flags`` on the
resulting ast node.
:type text:
``FileText``
:type flags:
``CompilerFlags``
:type auto_flags:
``bool``
:param auto_flags:
Whether to guess different flags if ``text`` can't be parsed with
``flags``.
:param mode:
Compilation mode: "exec", "single", or "eval".
:rtype:
``ast.Module``
"""
text = FileText(text)
filename = str(text.filename) if text.filename else "<unknown>"
source = text.joined
source = dedent(source)
if PY2 and isinstance(source, unicode):
source = source.encode('utf-8')
if not source.endswith("\n"):
# Ensure that the last line ends with a newline (``ast`` barfs
# otherwise).
source += "\n"
exp = None
for flags in _flags_to_try(source, flags, auto_flags, mode):
cflags = ast.PyCF_ONLY_AST | int(flags)
try:
result = compile(
source, filename, mode, flags=cflags, dont_inherit=1)
except SyntaxError as e:
exp = e
pass
else:
# Attach flags to the result.
result.input_flags = flags
result.source_flags = CompilerFlags.from_ast(result)
result.flags = result.input_flags | result.source_flags
result.text = text
return result
raise exp # SyntaxError
def _test_parse_string_literal(text, flags):
r"""
Attempt to parse ``text``. If it parses cleanly to a single string
literal, return its value. Otherwise return ``None``.
>>> _test_parse_string_literal(r'"foo\n" r"\nbar"', None)
'foo\n\\nbar'
"""
text = FileText(text)
if PY2:
try:
text.joined.encode('ascii')
except UnicodeError:
text = FileText(u'# encoding: utf-8\n' + unicode(text), filename=text.filename)
try:
module_node = _parse_ast_nodes(text, flags, False, "eval")
except SyntaxError:
return None
body = module_node.body
if not isinstance(body, (ast.Str, Bytes)):
return None
return body.s
AstNodeContext = namedtuple("AstNodeContext", "parent field index")
def _annotate_ast_nodes(ast_node):
"""
Annotate AST with:
- startpos and endpos
- [disabled for now: context as `AstNodeContext` ]
:type ast_node:
``ast.AST``
:param ast_node:
AST node returned by `_parse_ast_nodes`
:return:
``None``
"""
text = ast_node.text
flags = ast_node.flags
startpos = text.startpos
_annotate_ast_startpos(ast_node, None, startpos, text, flags)
# Not used for now:
# ast_node.context = AstNodeContext(None, None, None)
# _annotate_ast_context(ast_node)
def _annotate_ast_startpos(ast_node, parent_ast_node, minpos, text, flags):
r"""
Annotate ``ast_node``. Set ``ast_node.startpos`` to the starting position
of the node within ``text``.
For "typical" nodes, i.e. those other than multiline strings, this is
simply FilePos(ast_node.lineno, ast_node.col_offset+1), but taking
``text.startpos`` into account.
For multiline string nodes, this function works by trying to parse all
possible subranges of lines until finding the range that is syntactically
valid and matches ``value``. The candidate range is
text[min_start_lineno:lineno+text.startpos.lineno+1].
This function is unfortunately necessary because of a flaw in the output
produced by the Python built-in parser. For some crazy reason, the
``ast_node.lineno`` attribute represents something different for multiline
string literals versus all other statements. For multiline string literal
nodes and statements that are just a string expression (or more generally,
nodes where the first descendant leaf node is a multiline string literal),
the compiler attaches the ending line number as the value of the ``lineno``
attribute. For all other than AST nodes, the compiler attaches the
starting line number as the value of the ``lineno`` attribute. This means
e.g. the statement "'''foo\nbar'''" has a lineno value of 2, but the
statement "x='''foo\nbar'''" has a lineno value of 1.
:type ast_node:
``ast.AST``
:type minpos:
``FilePos``
:param minpos:
Earliest position to check, in the number space of ``text``.
:type text:
``FileText``
:param text:
Source text that was used to parse the AST, whose ``startpos`` should be
used in interpreting ``ast_node.lineno`` (which always starts at 1 for
the subset that was parsed).
:type flags:
``CompilerFlags``
:param flags:
Compiler flags to use when re-compiling code.
:return:
``True`` if this node is a multiline string literal or the first child is
such a node (recursively); ``False`` otherwise.
:raise ValueError:
Could not find the starting line number.
"""
assert isinstance(ast_node, (ast.AST, str, TypeIgnore)), ast_node
# joined strings and children do not carry a column offset on pre-3.8
# this prevent reformatting.
# set the column offset to the parent value before 3.8
if (3, 7) < sys.version_info < (3, 8):
if (
isinstance(ast_node, (getattr(ast, "JoinedStr", None), ast.FormattedValue))
or isinstance(
parent_ast_node, (getattr(ast, "JoinedStr", None), ast.FormattedValue)
)
) and ast_node.col_offset == -1:
ast_node.col_offset = parent_ast_node.col_offset
# First, traverse child nodes. If the first child node (recursively) is a
# multiline string, then we need to transfer its information to this node.
# Walk all nodes/fields of the AST. We implement this as a custom
# depth-first search instead of using ast.walk() or ast.NodeVisitor
# so that we can easily keep track of the preceding node's lineno.
child_minpos = minpos
is_first_child = True
leftstr_node = None
for child_node in _iter_child_nodes_in_order(ast_node):
leftstr = _annotate_ast_startpos(child_node, ast_node,
child_minpos, text, flags)
if is_first_child and leftstr:
leftstr_node = child_node
if hasattr(child_node, 'lineno') and not isinstance(child_node, TypeIgnore):
if child_node.startpos < child_minpos:
raise AssertionError(
"Got out-of-order AST node(s):\n"
" parent minpos=%s\n" % minpos +
" node: %s\n" % ast.dump(ast_node) +
" fields: %s\n" % (" ".join(ast_node._fields)) +
" children:\n" +
''.join(
" %s %9s: %s\n" % (
("==>" if cn is child_node else " "),
getattr(cn, 'startpos', ""),
ast.dump(cn))
for cn in _iter_child_nodes_in_order(ast_node)) +
"\n"
"This indicates a bug in pyflyby._\n"
"\n"
"pyflyby developer: Check if there's a bug or missing ast node handler in "
"pyflyby._parse._iter_child_nodes_in_order() - "
"probably the handler for ast.%s." % type(ast_node).__name__)
child_minpos = child_node.startpos
is_first_child = False
# If the node has no lineno at all, then skip it. This should only happen
# for nodes we don't care about, e.g. ``ast.Module`` or ``ast.alias``.
if not hasattr(ast_node, 'lineno') or isinstance(ast_node, TypeIgnore):
return False
# If col_offset is set then the lineno should be correct also.
if ast_node.col_offset >= 0:
# In Python 3.8+, FunctionDef.lineno is the line with the def. To
# account for decorators, we need the lineno of the first decorator
if (sys.version_info >= (3, 8)
and isinstance(ast_node, (ast.FunctionDef, ast.ClassDef))
and ast_node.decorator_list):
delta = (ast_node.decorator_list[0].lineno-1,
# The col_offset doesn't include the @
ast_node.decorator_list[0].col_offset - 1)
else:
delta = (ast_node.lineno-1, ast_node.col_offset)
# Not a multiline string literal. (I.e., it could be a non-string or
# a single-line string.)
# Easy.
startpos = text.startpos + delta
# Special case for 'with' statements. Consider the code:
# with X: pass
# ^0 ^5
# In python2.6, col_offset is 0.
# In python2.7, col_offset is 5.
# This is because python2.7 allows for multiple clauses:
# with X, Y: pass
# Since 'Y's col_offset isn't the beginning of the line, the authors
# of Python presumably changed 'X's col_offset to also not be the
# beginning of the line. If they had made the With ast node support
# multiple clauses, they wouldn't have needed to do that, but then
# that would introduce an API change in the AST. So it's
# understandable that they did that.
# Since we use startpos for breaking lines, we need to set startpos to
# the beginning of the line.
# In Python 3, the col_offset for the with is 0 again.
if (isinstance(ast_node, ast.With) and
not isinstance(parent_ast_node, ast.With) and
sys.version_info[:2] == (2,7)):
assert ast_node.col_offset >= 5
if startpos.lineno == text.startpos.lineno:
linestart = text.startpos.colno
else:
linestart = 1
line = text[(startpos.lineno,linestart):startpos]
m = re.search(r"\bwith\s+$", str(line))
assert m
lk = len(m.group()) # length of 'with ' including spaces
startpos = FilePos(startpos.lineno, startpos.colno - lk)
assert str(text[startpos:(startpos+(0,4))]) == "with"
ast_node.startpos = startpos
if sys.version_info <= (3, 8):
ast_node.startpos = max(startpos, minpos)
return False
assert ast_node.col_offset == -1
if leftstr_node:
# This is an ast node where the leftmost deepest leaf is a
# multiline string. The bug that multiline strings have broken
# lineno/col_offset infects ancestors up the tree.
#
# If the leftmost leaf is a multi-line string, then ``lineno``
# contains the ending line number, and col_offset is -1:
# >>> ast.parse("""'''foo\nbar'''+blah""").body[0].lineno
# 2
# But if the leftmost leaf is not a multi-line string, then
# ``lineno`` contains the starting line number:
# >>> ast.parse("""'''foobar'''+blah""").body[0].lineno
# 1
# >>> ast.parse("""blah+'''foo\nbar'''+blah""").body[0].lineno
# 1
#
# To fix that, we copy start_lineno and start_colno from the Str
# node once we've corrected the values.
assert not isinstance(ast_node, (ast.Str, Bytes))
assert leftstr_node.lineno == ast_node.lineno
assert leftstr_node.col_offset == -1
ast_node.startpos = leftstr_node.startpos
return True
# It should now be the case that we are looking at a multi-line string
# literal.
if sys.version_info >= (3, 7) and isinstance(ast_node, ast.FormattedValue):
ast_node.startpos = ast_node.value.startpos
ast_node.endpos = ast_node.value.startpos
return True
if not isinstance(ast_node, (ast.Str, Bytes)):
raise ValueError(
"got a non-string col_offset=-1: %s" % (ast.dump(ast_node)))
# The ``lineno`` attribute gives the ending line number of the multiline
# string ... unless it's multiple multiline strings that are concatenated
# by adjacency, in which case it's merely the end of the first one of
# them. At least we know that the start lineno is definitely not later
# than the ``lineno`` attribute.
first_end_lineno = text.startpos.lineno + ast_node.lineno - 1
# Compute possible start positions.
# The starting line number of this string could be anywhere between the
# end of the previous expression and ``first_end_lineno``.
startpos_candidates = []
assert minpos.lineno <= first_end_lineno
for start_lineno in range(minpos.lineno, first_end_lineno + 1):
start_line = text[start_lineno]
start_line_colno = (text.startpos.colno
if start_lineno==text.startpos.lineno else 1)
startpos_candidates.extend([
(_m.group()[-1], FilePos(start_lineno, _m.start()+start_line_colno))
for _m in re.finditer("[bBrRuU]*[\"\']", start_line)])
target_str = ast_node.s
if isinstance(target_str, bytes) and sys.version_info[:2] == (3, 7):
target_str = target_str.decode()
# Loop over possible end_linenos. The first one we've identified is the
# by far most likely one, but in theory it could be anywhere later in the
# file. This could be because of a dastardly concatenated string like
# this:
# """ # L1
# two # L2
# """ """ # L3
# four # L4
# five # L5
# six # L6
# """ # L7
# There are two substrings on L1:L3 and L3:L7. The parser gives us a
# single concatenated string, but sets lineno to 3 instead of 7. We don't
# have much to go on to figure out that the real end_lineno is 7. If we
# don't find the string ending on L3, then search forward looking for the
# real end of the string. Yuck!
#
# This is now complicated by fstrings that do interpolate variable on 3.7 fixed on 3.8+)
# where we'll try to guess based on prefix
f_string_candidate_prefixes = []
for end_lineno in range(first_end_lineno, text.endpos.lineno+1):
# Compute possible end positions. We're given the line we're ending
# on, but not the column position. Note that the ending line could
# contain more than just the string we're looking for -- including
# possibly other strings or comments.
end_line = text[end_lineno]
end_line_startcol = (
text.startpos.colno if end_lineno==text.startpos.lineno else 1)
endpos_candidates = [
(_m.group(), FilePos(end_lineno,_m.start()+end_line_startcol+1))
for _m in re.finditer("[\"\']", end_line)]
if not endpos_candidates:
# We found no endpos_candidates. This should not happen for
# first_end_lineno because there should be _some_ string that ends
# there.
if end_lineno == first_end_lineno:
raise AssertionError(
"No quote char found on line with supposed string")
continue
# Filter and sort the possible startpos candidates given this endpos
# candidate. It's possible for the starting quotechar and ending
# quotechar to be different in case of adjacent string concatenation,
# e.g. "foo"'''bar'''. That said, it's an unlikely case, so
# deprioritize checking them.
likely_candidates = []
unlikely_candidates = []
for end_quotechar, endpos in reversed(endpos_candidates):
for start_quotechar, startpos in startpos_candidates:
if not startpos < endpos:
continue
if start_quotechar == end_quotechar:
candidate_list = likely_candidates
else:
candidate_list = unlikely_candidates
candidate_list.append((startpos,endpos))
# Loop over sorted candidates.
matched_prefix = set()
for (startpos, endpos) in likely_candidates + unlikely_candidates:
# Try to parse the given range and see if it matches the target
# string literal.
subtext = text[startpos:endpos]
candidate_str = _test_parse_string_literal(subtext, flags)
if candidate_str is None:
continue
if isinstance(candidate_str, bytes) and sys.version_info[:2] == (3, 7):
candidate_str = candidate_str.decode()
maybe_fstring = False
try:
if (3, 7) <= sys.version_info <= (3, 8):
potential_start = text.lines[startpos.lineno - 1]
maybe_fstring = ("f'" in potential_start) or (
'f"' in potential_start
)
except IndexError:
pass
if target_str == candidate_str and target_str:
# Success!
ast_node.startpos = startpos
ast_node.endpos = endpos
# This node is a multiline string; and, it's a leaf, so by
# definition it is the leftmost node.
return True # all done
elif candidate_str and target_str.startswith(candidate_str):
matched_prefix.add(startpos)
elif maybe_fstring:
candidate_prefix = candidate_str.split("{")[0]
if candidate_prefix and target_str.startswith(candidate_prefix):
f_string_candidate_prefixes.append((startpos, endpos))
# We didn't find a string given the current end_lineno candidate.
# Only continue checking the startpos candidates that so far produced
# prefixes of the string we're looking for.
if not matched_prefix:
break
startpos_candidates = [
(sq, sp)
for (sq, sp) in startpos_candidates
if sp in matched_prefix
]
if (3, 7) <= sys.version_info <= (3, 8):
if len(f_string_candidate_prefixes) == 1:
# we did not find the string but there is one fstring candidate starting it
ast_node.startpos, ast_node.endpos = f_string_candidate_prefixes[0]
return True
elif isinstance(parent_ast_node, ast.JoinedStr):
self_pos = parent_ast_node.values.index(ast_node)
ast_node.startpos = parent_ast_node.values[self_pos - 1].startpos
ast_node.endpos = parent_ast_node.values[self_pos - 1].endpos
return True
raise ValueError("Couldn't find exact position of %s" % (ast.dump(ast_node)))
def _annotate_ast_context(ast_node):
"""
Recursively annotate ``context`` on ast nodes, setting ``context`` to
a `AstNodeContext` named tuple with values
``(parent, field, index)``.
Each ast_node satisfies ``parent.<field>[<index>] is ast_node``.
For non-list fields, the index part is ``None``.
"""
assert isinstance(ast_node, ast.AST)
for field_name, field_value in ast.iter_fields(ast_node):
if isinstance(field_value, ast.AST):
child_node = field_value
child_node.context = AstNodeContext(ast_node, field_name, None)
_annotate_ast_context(child_node)
elif isinstance(field_value, list):
for i, item in enumerate(field_value):
if isinstance(item, ast.AST):
child_node = item
child_node.context = AstNodeContext(ast_node, field_name, i)
_annotate_ast_context(child_node)
def _split_code_lines(ast_nodes, text):
"""
Split the given ``ast_nodes`` and corresponding ``text`` by code/noncode
statement.
Yield tuples of (nodes, subtext). ``nodes`` is a list of ``ast.AST`` nodes,
length 0 or 1; ``subtext`` is a `FileText` sliced from ``text``.
FileText(...))} for code lines and ``(None, FileText(...))`` for non-code
lines (comments and blanks).
:type ast_nodes:
sequence of ``ast.AST`` nodes
:type text:
`FileText`
"""
if not ast_nodes:
yield ([], text)
return
assert text.startpos <= ast_nodes[0].startpos
assert ast_nodes[-1].startpos < text.endpos
if text.startpos != ast_nodes[0].startpos:
# Starting noncode lines.
yield ([], text[text.startpos:ast_nodes[0].startpos])
end_sentinel = _DummyAst_Node()
end_sentinel.startpos = text.endpos
for node, next_node in zip(ast_nodes, ast_nodes[1:] + [end_sentinel]):
startpos = node.startpos
next_startpos = next_node.startpos
assert startpos < next_startpos
# We have the start position of this node. Figure out the end
# position, excluding noncode lines (standalone comments and blank
# lines).
if hasattr(node, 'endpos'):
# We have an endpos for the node because this was a multi-line
# string. Start with the node endpos.
endpos = node.endpos
assert startpos < endpos <= next_startpos
# enpos points to the character *after* the ending quote, so we
# know that this is never at the beginning of the line.
assert endpos.colno != 1
# Advance past whitespace an inline comment, if any. Do NOT
# advance past other code that could be on the same line, nor past
# blank lines and comments on subsequent lines.
line = text[endpos : min(text.endpos, FilePos(endpos.lineno+1,1))]
if _is_comment_or_blank(line):
endpos = FilePos(endpos.lineno+1, 1)
else:
endpos = next_startpos
assert endpos <= text.endpos
# We don't have an endpos yet; what we do have is the next node's
# startpos (or the position at the end of the text). Start there
# and work backward.
if endpos.colno != 1:
if endpos == text.endpos:
# There could be a comment on the last line and no
# trailing newline.
# TODO: do this in a more principled way.
if _is_comment_or_blank(text[endpos.lineno]):
assert startpos.lineno < endpos.lineno
if not text[endpos.lineno-1].endswith("\\"):
endpos = FilePos(endpos.lineno,1)
else:
# We're not at end of file, yet the next node starts in
# the middle of the line. This should only happen with if
# we're not looking at a comment. [The first character in
# the line could still be "#" if we're inside a multiline
# string that's the last child of the parent node.
# Therefore we don't assert 'not
# _is_comment_or_blank(...)'.]
pass
if endpos.colno == 1:
while (endpos.lineno-1 > startpos.lineno and
_is_comment_or_blank(text[endpos.lineno-1]) and
(not text[endpos.lineno-2].endswith("\\") or
_is_comment_or_blank(text[endpos.lineno-2]))):
endpos = FilePos(endpos.lineno-1, 1)
assert startpos < endpos <= next_startpos
yield ([node], text[startpos:endpos])
if endpos != next_startpos:
yield ([], text[endpos:next_startpos])
def _ast_node_is_in_docstring_position(ast_node):
"""
Given a ``Str`` AST node, return whether its position within the AST makes
it eligible as a docstring.
The main way a ``Str`` can be a docstring is if it is a standalone string
at the beginning of a ``Module``, ``FunctionDef``, or ``ClassDef``.
We also support variable docstrings per Epydoc:
- If a variable assignment statement is immediately followed by a bare
string literal, then that assignment is treated as a docstring for
that variable.
:type ast_node:
``ast.Str``
:param ast_node:
AST node that has been annotated by ``_annotate_ast_nodes``.
:rtype:
``bool``
:return:
Whether this string ast node is in docstring position.
"""
if not isinstance(ast_node, (ast.Str, Bytes)):
raise TypeError
expr_node = ast_node.context.parent
if not isinstance(expr_node, ast.Expr):
return False
assert ast_node.context.field == 'value'
assert ast_node.context.index is None
expr_ctx = expr_node.context
if expr_ctx.field != 'body':
return False
parent_node = expr_ctx.parent
if not isinstance(parent_node, (ast.FunctionDef, ast.ClassDef, ast.Module)):
return False
if expr_ctx.index == 0:
return True
prev_sibling_node = parent_node.body[expr_ctx.index-1]
if isinstance(prev_sibling_node, ast.Assign):
return True
return False
def infer_compile_mode(arg):
"""
Infer the mode needed to compile ``arg``.
:type arg:
``ast.AST``
:rtype:
``str``
"""
# Infer mode from ast object.
if isinstance(arg, ast.Module):
mode = "exec"
elif isinstance(arg, ast.Expression):
mode = "eval"
elif isinstance(arg, ast.Interactive):
mode = "single"
else:
raise TypeError(
"Expected Module/Expression/Interactive ast node; got %s"
% (type(arg).__name__))
return mode
class _DummyAst_Node(object):
pass
class PythonStatement(object):
r"""
Representation of a top-level Python statement or consecutive
comments/blank lines.
>>> PythonStatement('print("x",\n file=None)\n', flags='print_function') #doctest: +SKIP
PythonStatement('print("x",\n file=None)\n', flags=0x10000)
Implemented as a wrapper around a `PythonBlock` containing at most one
top-level AST node.
"""
def __new__(cls, arg, filename=None, startpos=None, flags=None):
if isinstance(arg, cls):
if filename is startpos is flags is None:
return arg
arg = arg.block
# Fall through
if isinstance(arg, (PythonBlock, FileText, str, six.text_type)):
block = PythonBlock(arg, filename=filename,
startpos=startpos, flags=flags)
statements = block.statements
if len(statements) != 1:
raise ValueError(
"Code contains %d statements instead of exactly 1: %r"
% (len(statements), block))
statement, = statements
assert isinstance(statement, cls)
return statement
raise TypeError("PythonStatement: unexpected %s" % (type(arg).__name__,))
@classmethod
def _construct_from_block(cls, block):
# Only to be used by PythonBlock.
assert isinstance(block, PythonBlock)
self = object.__new__(cls)
self.block = block
return self
@property
def text(self):
"""
:rtype:
`FileText`
"""
return self.block.text
@property
def filename(self):
"""
:rtype:
`Filename`
"""
return self.text.filename
@property
def startpos(self):
"""
:rtype:
`FilePos`
"""
return self.text.startpos
@property
def flags(self):
"""
:rtype:
`CompilerFlags`
"""
return self.block.flags
@property
def ast_node(self):
"""
A single AST node representing this statement, or ``None`` if this
object only represents comments/blanks.
:rtype:
``ast.AST`` or ``NoneType``
"""
ast_nodes = self.block.ast_node.body
if len(ast_nodes) == 0:
return None
if len(ast_nodes) == 1:
return ast_nodes[0]
raise AssertionError("More than one AST node in block")
@property
def is_comment_or_blank(self):
return self.ast_node is None
@property
def is_comment_or_blank_or_string_literal(self):
return (self.is_comment_or_blank
or _ast_str_literal_value(self.ast_node) is not None)
@property
def is_import(self):
return isinstance(self.ast_node, (ast.Import, ast.ImportFrom))
@property
def is_single_assign(self):
n = self.ast_node
return isinstance(n, ast.Assign) and len(n.targets) == 1
def get_assignment_literal_value(self):
"""
If the statement is an assignment, return the name and literal value.
>>> PythonStatement('foo = {1: {2: 3}}').get_assignment_literal_value()
('foo', {1: {2: 3}})
:return:
(target, literal_value)
"""
if not self.is_single_assign:
raise ValueError(
"Statement is not an assignment to a single name: %s" % self)
n = self.ast_node
target_name = n.targets[0].id
literal_value = ast.literal_eval(n.value)
return (target_name, literal_value)
def __repr__(self):
r = repr(self.block)
assert r.startswith("PythonBlock(")
r = "PythonStatement(" + r[12:]
return r
def __eq__(self, other):
if self is other:
return True
if not isinstance(other, PythonStatement):
return NotImplemented
return self.block == other.block
def __ne__(self, other):
return not (self == other)
# The rest are defined by total_ordering
def __lt__(self, other):
if not isinstance(other, PythonStatement):
return NotImplemented
return self.block < other.block
def __cmp__(self, other):
if self is other:
return 0
if not isinstance(other, PythonStatement):
return NotImplemented
return cmp(self.block, other.block)
def __hash__(self):
return hash(self.block)
@total_ordering
class PythonBlock(object):
r"""
Representation of a sequence of consecutive top-level
`PythonStatement` (s).
>>> source_code = '# 1\nprint(2)\n# 3\n# 4\nprint(5)\nx=[6,\n 7]\n# 8\n'
>>> codeblock = PythonBlock(source_code)
>>> for stmt in PythonBlock(codeblock).statements:
... print(stmt)
PythonStatement('# 1\n')
PythonStatement('print(2)\n', startpos=(2,1))
PythonStatement('# 3\n# 4\n', startpos=(3,1))
PythonStatement('print(5)\n', startpos=(5,1))
PythonStatement('x=[6,\n 7]\n', startpos=(6,1))
PythonStatement('# 8\n', startpos=(8,1))
A ``PythonBlock`` has a ``flags`` attribute that gives the compiler_flags
associated with the __future__ features using which the code should be
parsed.
"""
def __new__(cls, arg, filename=None, startpos=None, flags=None,
auto_flags=None):
if isinstance(arg, PythonStatement):
arg = arg.block
# Fall through
if isinstance(arg, cls):
if filename is startpos is flags is None:
return arg
flags = CompilerFlags(flags, arg.flags)
arg = arg.text
# Fall through
if isinstance(arg, (FileText, Filename, str, six.text_type)):
return cls.from_text(
arg, filename=filename, startpos=startpos,
flags=flags, auto_flags=auto_flags)
raise TypeError("%s: unexpected %s"
% (cls.__name__, type(arg).__name__,))
@classmethod
def from_filename(cls, filename):
return cls.from_text(Filename(filename))
@classmethod
def from_text(cls, text, filename=None, startpos=None, flags=None,
auto_flags=False):
"""
:type text:
`FileText` or convertible
:type filename:
``Filename``
:param filename:
Filename, if not already given by ``text``.
:type startpos:
``FilePos``
:param startpos:
Starting position, if not already given by ``text``.
:type flags:
``CompilerFlags``
:param flags:
Input compiler flags.
:param auto_flags:
Whether to try other flags if ``flags`` fails.
:rtype:
`PythonBlock`
"""
text = FileText(text, filename=filename, startpos=startpos)
self = object.__new__(cls)
self.text = text
self._input_flags = CompilerFlags(flags)
self._auto_flags = auto_flags
return self
@classmethod
def __construct_from_annotated_ast(cls, annotated_ast_nodes, text, flags):
# Constructor for internal use by _split_by_statement() or
# concatenate().
ast_node = ast.Module(annotated_ast_nodes)
ast_node.text = text
ast_node.flags = flags
if not hasattr(ast_node, "source_flags"):
ast_node.source_flags = CompilerFlags.from_ast(annotated_ast_nodes)
self = object.__new__(cls)
self._ast_node_or_parse_exception = ast_node
self.ast_node = ast_node
self.annotated_ast_node = ast_node
self.text = text
self.flags = self._input_flags = flags
self._auto_flags = False
return self
@classmethod
def concatenate(cls, blocks, assume_contiguous=False):
"""
Concatenate a bunch of blocks into one block.
:type blocks:
sequence of `PythonBlock` s and/or `PythonStatement` s
:param assume_contiguous:
Whether to assume, without checking, that the input blocks were
originally all contiguous. This must be set to True to indicate the
caller understands the assumption; False is not implemented.
"""
if not assume_contiguous:
raise NotImplementedError
blocks = [PythonBlock(b) for b in blocks]
if len(blocks) == 1:
return blocks[0]
assert blocks
text = FileText.concatenate([b.text for b in blocks])
# The contiguous assumption is important here because ``ast_node``
# contains line information that would otherwise be wrong.
ast_nodes = [n for b in blocks for n in b.annotated_ast_node.body]
flags = blocks[0].flags
return cls.__construct_from_annotated_ast(ast_nodes, text, flags)
@property
def filename(self):
return self.text.filename
@property
def startpos(self):
return self.text.startpos
@property
def endpos(self):
return self.text.endpos
@cached_attribute
def _ast_node_or_parse_exception(self):
"""
Attempt to parse this block of code into an abstract syntax tree.
Cached (including exception case).
:return:
Either ast_node or exception.
"""
# This attribute may also be set by __construct_from_annotated_ast(),
# in which case this code does not run.
try:
return _parse_ast_nodes(
self.text, self._input_flags, self._auto_flags, "exec")
except Exception as e:
# Add the filename to the exception message to be nicer.
if self.text.filename:
try:
e = type(e)("While parsing %s: %s" % (self.text.filename, e))
except TypeError:
# Exception takes more than one argument
pass
# Cache the exception to avoid re-attempting while debugging.
return e
@cached_attribute
def parsable(self):
"""
Whether the contents of this ``PythonBlock`` are parsable as Python
code, using the given flags.
:rtype:
``bool``
"""
return isinstance(self._ast_node_or_parse_exception, ast.AST)
@cached_attribute
def parsable_as_expression(self):
"""
Whether the contents of this ``PythonBlock`` are parsable as a single
Python expression, using the given flags.
:rtype:
``bool``
"""
return self.parsable and self.expression_ast_node is not None
@cached_attribute
def ast_node(self):
"""
Parse this block of code into an abstract syntax tree.
The returned object type is the kind of AST as returned by the
``compile`` built-in (rather than as returned by the older, deprecated
``compiler`` module). The code is parsed using mode="exec".
The result is a ``ast.Module`` node, even if this block represents only
a subset of the entire file.
:rtype:
``ast.Module``
"""
r = self._ast_node_or_parse_exception
if isinstance(r, ast.AST):
return r
else:
raise r
@cached_attribute
def annotated_ast_node(self):
"""
Return ``self.ast_node``, annotated in place with positions.
All nodes are annotated with ``startpos``.
All top-level nodes are annotated with ``endpos``.
:rtype:
``ast.Module``
"""
result = self.ast_node
_annotate_ast_nodes(result)
return result
@cached_attribute
def expression_ast_node(self):
"""
Return an ``ast.Expression`` if ``self.ast_node`` can be converted into
one. I.e., return parse(self.text, mode="eval"), if possible.
Otherwise, return ``None``.
:rtype:
``ast.Expression``
"""
node = self.ast_node
if len(node.body) == 1 and isinstance(node.body[0], ast.Expr):
return ast.Expression(node.body[0].value)
else:
return None
def parse(self, mode=None):
"""
Parse the source text into an AST.
:param mode:
Compilation mode: "exec", "single", or "eval". "exec", "single",
and "eval" work as the built-in ``compile`` function do. If ``None``,
then default to "eval" if the input is a string with a single
expression, else "exec".
:rtype:
``ast.AST``
"""
if mode == "exec":
return self.ast_node
elif mode == "eval":
if self.expression_ast_node:
return self.expression_ast_node
else:
raise SyntaxError
elif mode == None:
if self.expression_ast_node:
return self.expression_ast_node
else:
return self.ast_node
elif mode == "exec":
raise NotImplementedError
else:
raise ValueError("parse(): invalid mode=%r" % (mode,))
def compile(self, mode=None):
"""
Parse into AST and compile AST into code.
:rtype:
``CodeType``
"""
ast_node = self.parse(mode=mode)
mode = infer_compile_mode(ast_node)
filename = str(self.filename or "<unknown>")
return compile(ast_node, filename, mode)
@cached_attribute
def statements(self):
r"""
Partition of this ``PythonBlock`` into individual ``PythonStatement`` s.
Each one contains at most 1 top-level ast node. A ``PythonStatement``
can contain no ast node to represent comments.
>>> code = "# multiline\n# comment\n'''multiline\nstring'''\nblah\n"
>>> print(PythonBlock(code).statements) # doctest:+NORMALIZE_WHITESPACE
(PythonStatement('# multiline\n# comment\n'),
PythonStatement("'''multiline\nstring'''\n", startpos=(3,1)),
PythonStatement('blah\n', startpos=(5,1)))
:rtype:
``tuple`` of `PythonStatement` s
"""
node = self.annotated_ast_node
nodes_subtexts = list(_split_code_lines(node.body, self.text))
if nodes_subtexts == [(self.ast_node.body, self.text)]:
# This block is either all comments/blanks or a single statement
# with no surrounding whitespace/comment lines. Return self.
return (PythonStatement._construct_from_block(self),)
cls = type(self)
statement_blocks = [
cls.__construct_from_annotated_ast(subnodes, subtext, self.flags)
for subnodes, subtext in nodes_subtexts]
# Convert to statements.
statements = []
for b in statement_blocks:
statement = PythonStatement._construct_from_block(b)
statements.append(statement)
# Optimization: set the new sub-block's ``statements`` attribute
# since we already know it contains exactly one statement, itself.
assert 'statements' not in b.__dict__
b.statements = (statement,)
return tuple(statements)
@cached_attribute
def source_flags(self):
"""
If the AST contains __future__ imports, then the compiler_flags
associated with them. Otherwise, 0.
The difference between ``source_flags`` and ``flags`` is that ``flags``
may be set by the caller (e.g. based on an earlier __future__ import)
and include automatically guessed flags, whereas ``source_flags`` is
only nonzero if this code itself contains __future__ imports.
:rtype:
`CompilerFlags`
"""
return self.ast_node.source_flags
@cached_attribute
def flags(self):
"""
The compiler flags for this code block, including both the input flags
(possibly automatically guessed), and the flags from "__future__"
imports in the source code text.
:rtype:
`CompilerFlags`
"""
return self.ast_node.flags
def groupby(self, predicate):
"""
Partition this block of code into smaller blocks of code which
consecutively have the same ``predicate``.
:param predicate:
Function that takes a `PythonStatement` and returns a value.
:return:
Generator that yields (group, `PythonBlock` s).
"""
cls = type(self)
for pred, stmts in groupby(self.statements, predicate):
blocks = [s.block for s in stmts]
yield pred, cls.concatenate(blocks, assume_contiguous=True)
def string_literals(self):
r"""
Yield all string literals anywhere in this block.
The string literals have ``startpos`` attributes attached.
>>> block = PythonBlock("'a' + ('b' + \n'c')")
>>> [(f.s, f.startpos) for f in block.string_literals()]
[('a', FilePos(1,1)), ('b', FilePos(1,8)), ('c', FilePos(2,1))]
:return:
Iterable of ``ast.Str`` or ``ast.Bytes`` nodes
"""
for node in _walk_ast_nodes_in_order(self.annotated_ast_node):
if isinstance(node, (ast.Str, Bytes)):
assert hasattr(node, 'startpos')
yield node
def _get_docstring_nodes(self):
"""
Yield docstring AST nodes.
We consider the following to be docstrings::
- First literal string of function definitions, class definitions,
and modules (the python standard)
- Literal strings after assignments, per Epydoc
:rtype:
Generator of ``ast.Str`` nodes
"""
# This is similar to ``ast.get_docstring``, but:
# - This function is recursive
# - This function yields the node object, rather than the string
# - This function yields multiple docstrings (even per ast node)
# - This function doesn't raise TypeError on other AST types
# - This function doesn't cleandoc
# A previous implementation did
# [n for n in self.string_literals()
# if _ast_node_is_in_docstring_position(n)]
# However, the method we now use is more straightforward, and doesn't
# require first annotating each node with context information.
docstring_containers = (ast.FunctionDef, ast.ClassDef, ast.Module)
for node in _walk_ast_nodes_in_order(self.annotated_ast_node):
if not isinstance(node, docstring_containers):
continue
if not node.body:
continue
# If the first body item is a literal string, then yield the node.
if (isinstance(node.body[0], ast.Expr) and
isinstance(node.body[0].value, ast.Str)):
yield node.body[0].value
for i in range(1, len(node.body)-1):
# If a body item is an assignment and the next one is a
# literal string, then yield the node for the literal string.
n1, n2 = node.body[i], node.body[i+1]
if (isinstance(n1, ast.Assign) and
isinstance(n2, ast.Expr) and
isinstance(n2.value, ast.Str)):
yield n2.value
def get_doctests(self):
r"""
Return doctests in this code.
>>> PythonBlock("x\n'''\n >>> foo(bar\n ... + baz)\n'''\n").get_doctests()
[PythonBlock('foo(bar\n + baz)\n', startpos=(3,2))]
:rtype:
``list`` of `PythonStatement` s
"""
parser = IgnoreOptionsDocTestParser()
doctest_blocks = []
filename = self.filename
flags = self.flags
for ast_node in self._get_docstring_nodes():
try:
examples = parser.get_examples(ast_node.s)
except Exception:
blob = ast_node.s
if len(blob) > 60:
blob = blob[:60] + '...'
# TODO: let caller decide how to handle
logger.warning("Can't parse docstring; ignoring: %r", blob)
continue
for example in examples:
lineno = ast_node.startpos.lineno + example.lineno
colno = ast_node.startpos.colno + example.indent # dubious
text = FileText(example.source, filename=filename,
startpos=(lineno,colno))
try:
block = PythonBlock(text, flags=flags)
block.ast_node # make sure we can parse
except Exception:
blob = text.joined
if len(blob) > 60:
blob = blob[:60] + '...'
logger.warning("Can't parse doctest; ignoring: %r", blob)
continue
doctest_blocks.append(block)
return doctest_blocks
def __repr__(self):
r = "%s(%r" % (type(self).__name__, self.text.joined)
if self.filename:
r += ", filename=%r" % (str(self.filename),)
if self.startpos != FilePos():
r += ", startpos=%s" % (self.startpos,)
if self.flags != self.source_flags:
r += ", flags=%s" % (self.flags,)
r += ")"
return r
def __str__(self):
return str(self.text)
def __text__(self):
return self.text
def __eq__(self, other):
if self is other:
return True
if not isinstance(other, PythonBlock):
return NotImplemented
return self.text == other.text and self.flags == other.flags
def __ne__(self, other):
return not (self == other)
# The rest are defined by total_ordering
def __lt__(self, other):
if not isinstance(other, PythonBlock):
return NotImplemented
return (self.text, self.flags) < (other.text, other.flags)
def __cmp__(self, other):
if self is other:
return 0
if not isinstance(other, PythonBlock):
return NotImplemented
return cmp(self.text, other.text) or cmp(self.flags, other.flags)
def __hash__(self):
h = hash((self.text, self.flags))
self.__hash__ = lambda: h
return h
class IgnoreOptionsDocTestParser(DocTestParser):
def _find_options(self, source, name, lineno):
# Ignore doctest options. We don't use them, and we don't want to
# error on unknown options, which is what the default DocTestParser
# does.
return {}