1482 lines
55 KiB
Python
1482 lines
55 KiB
Python
# pyflyby/_parse.py.
|
|
# Copyright (C) 2011, 2012, 2013, 2014, 2015, 2018 Karl Chen.
|
|
# License: MIT http://opensource.org/licenses/MIT
|
|
|
|
from __future__ import (absolute_import, division, print_function,
|
|
with_statement)
|
|
|
|
import ast
|
|
from collections import namedtuple
|
|
from doctest import DocTestParser
|
|
from functools import total_ordering
|
|
from itertools import groupby
|
|
import re
|
|
import sys
|
|
from textwrap import dedent
|
|
import types
|
|
|
|
import six
|
|
from six import PY2, PY3, text_type as unicode
|
|
from six.moves import range
|
|
|
|
from pyflyby._file import FilePos, FileText, Filename
|
|
from pyflyby._flags import CompilerFlags
|
|
from pyflyby._log import logger
|
|
from pyflyby._util import cached_attribute, cmp
|
|
|
|
if PY3:
|
|
from ast import Bytes
|
|
else:
|
|
Bytes = ast.Str
|
|
|
|
|
|
if sys.version_info >= (3, 8):
|
|
from ast import TypeIgnore
|
|
else:
|
|
# TypeIgnore does not exist on Python 3.7 and before.
|
|
# thus we define a dummy TypeIgnore just to simplify remaining code.
|
|
|
|
class TypeIgnore:
|
|
pass
|
|
|
|
|
|
def _is_comment_or_blank(line):
|
|
"""
|
|
Returns whether a line of python code contains only a comment is blank.
|
|
|
|
>>> _is_comment_or_blank("foo\\n")
|
|
False
|
|
|
|
>>> _is_comment_or_blank(" # blah\\n")
|
|
True
|
|
"""
|
|
return re.sub("#.*", "", line).rstrip() == ""
|
|
|
|
|
|
def _ast_str_literal_value(node):
|
|
if isinstance(node, (ast.Str, Bytes)):
|
|
return node.s
|
|
if isinstance(node, ast.Expr) and isinstance(node.value, (ast.Str, Bytes)):
|
|
return node.value.s
|
|
else:
|
|
return None
|
|
|
|
|
|
def _flatten_ast_nodes(arg):
|
|
if arg is None:
|
|
pass
|
|
elif isinstance(arg, ast.AST):
|
|
yield arg
|
|
elif isinstance(arg, str):
|
|
#FunctionDef type_comments
|
|
yield arg
|
|
elif isinstance(arg, (tuple, list, types.GeneratorType)):
|
|
for x in arg:
|
|
for y in _flatten_ast_nodes(x):
|
|
yield y
|
|
else:
|
|
raise TypeError(
|
|
"_flatten_ast_nodes: unexpected %s" % (type(arg).__name__,))
|
|
|
|
|
|
def _iter_child_nodes_in_order(node):
|
|
"""
|
|
Yield all direct child nodes of ``node``, that is, all fields that are nodes
|
|
and all items of fields that are lists of nodes.
|
|
|
|
``_iter_child_nodes_in_order`` yields nodes in the same order that they
|
|
appear in the source.
|
|
|
|
``ast.iter_child_nodes`` does the same thing, but not in source order.
|
|
e.g. for ``Dict`` s, it yields all key nodes before all value nodes.
|
|
"""
|
|
return _flatten_ast_nodes(_iter_child_nodes_in_order_internal_1(node))
|
|
|
|
|
|
def _iter_child_nodes_in_order_internal_1(node):
|
|
if isinstance(node, str):
|
|
# this happen for type comments which are not ast nodes but str
|
|
# they do not have children. We yield nothing.
|
|
yield []
|
|
return
|
|
if not isinstance(node, ast.AST):
|
|
raise TypeError
|
|
if isinstance(node, ast.Dict):
|
|
assert node._fields == ("keys", "values")
|
|
yield list(zip(node.keys, node.values))
|
|
elif isinstance(node, ast.FunctionDef):
|
|
if six.PY2:
|
|
assert node._fields == ('name', 'args', 'body', 'decorator_list'), node._fields
|
|
yield node.decorator_list, node.args, node.body
|
|
elif sys.version_info >= (3, 8):
|
|
assert node._fields == (
|
|
"name",
|
|
"args",
|
|
"body",
|
|
"decorator_list",
|
|
"returns",
|
|
"type_comment",
|
|
), node._fields
|
|
res = (
|
|
node.type_comment,
|
|
node.decorator_list,
|
|
node.args,
|
|
node.returns,
|
|
node.body,
|
|
)
|
|
yield res
|
|
else:
|
|
assert node._fields == ('name', 'args', 'body', 'decorator_list',
|
|
'returns'), node._fields
|
|
yield node.decorator_list, node.args, node.returns, node.body
|
|
# node.name is a string, not an AST node
|
|
elif isinstance(node, ast.arguments):
|
|
if six.PY2:
|
|
assert node._fields == ('args', 'vararg', 'kwarg', 'defaults'), node._fields
|
|
elif sys.version_info >= (3, 8):
|
|
assert node._fields == ('posonlyargs', 'args', 'vararg', 'kwonlyargs',
|
|
'kw_defaults', 'kwarg', 'defaults'), node._fields
|
|
else:
|
|
assert node._fields == ('args', 'vararg', 'kwonlyargs',
|
|
'kw_defaults', 'kwarg', 'defaults'), node._fields
|
|
defaults = node.defaults or ()
|
|
num_no_default = len(node.args)-len(defaults)
|
|
yield node.args[:num_no_default]
|
|
yield list(zip(node.args[num_no_default:], defaults))
|
|
# node.varags and node.kwarg are strings, not AST nodes.
|
|
elif isinstance(node, ast.IfExp):
|
|
assert node._fields == ('test', 'body', 'orelse')
|
|
yield node.body, node.test, node.orelse
|
|
elif isinstance(node, ast.Call):
|
|
# call arguments order are lost by ast, re-order them
|
|
yield node.func
|
|
args = sorted([(k.value.lineno, k.value.col_offset, k) for k in node.keywords]+
|
|
[(k.lineno,k.col_offset, k) for k in node.args])
|
|
yield [a[2] for a in args]
|
|
elif isinstance(node, ast.ClassDef):
|
|
if six.PY2:
|
|
assert node._fields == ('name', 'bases', 'body', 'decorator_list')
|
|
else:
|
|
assert node._fields == ('name', 'bases', 'keywords', 'body', 'decorator_list')
|
|
yield node.decorator_list, node.bases, node.body
|
|
# node.name is a string, not an AST node
|
|
elif sys.version_info >= (3, 7) and isinstance(node, ast.FormattedValue):
|
|
assert node._fields == ('value', 'conversion', 'format_spec')
|
|
yield node.value,
|
|
else:
|
|
# Default behavior.
|
|
yield ast.iter_child_nodes(node)
|
|
|
|
|
|
def _walk_ast_nodes_in_order(node):
|
|
"""
|
|
Recursively yield all child nodes of ``node``, in the same order that the
|
|
node appears in the source.
|
|
|
|
``ast.walk`` does the same thing, but yields nodes in an arbitrary order.
|
|
"""
|
|
# The implementation is basically the same as ``ast.walk``, but:
|
|
# 1. Use a stack instead of a deque. (I.e., depth-first search instead
|
|
# of breadth-first search.)
|
|
# 2. Use _iter_child_nodes_in_order instead of ``ast.iter_child_nodes``.
|
|
todo = [node]
|
|
while todo:
|
|
node = todo.pop()
|
|
yield node
|
|
todo.extend(reversed(list(_iter_child_nodes_in_order(node))))
|
|
|
|
|
|
def _flags_to_try(source, flags, auto_flags, mode):
|
|
"""
|
|
Flags to try for ``auto_flags``.
|
|
|
|
If ``auto_flags`` is False, then only yield ``flags``.
|
|
If ``auto_flags`` is True, then yield ``flags`` and ``flags ^ print_function``.
|
|
"""
|
|
flags = CompilerFlags(flags)
|
|
if sys.version_info >= (3, 8):
|
|
if re.search(r"# *type:", source):
|
|
flags = flags | CompilerFlags('type_comments')
|
|
yield flags
|
|
return
|
|
if not auto_flags:
|
|
yield flags
|
|
return
|
|
if PY3:
|
|
yield flags
|
|
return
|
|
if mode == "eval":
|
|
if re.search(r"\bprint\b", source):
|
|
flags = flags | CompilerFlags("print_function")
|
|
yield flags
|
|
return
|
|
yield flags
|
|
if re.search(r"\bprint\b", source):
|
|
yield flags ^ CompilerFlags("print_function")
|
|
|
|
|
|
def _parse_ast_nodes(text, flags, auto_flags, mode):
|
|
"""
|
|
Parse a block of lines into an AST.
|
|
|
|
Also annotate ``input_flags``, ``source_flags``, and ``flags`` on the
|
|
resulting ast node.
|
|
|
|
:type text:
|
|
``FileText``
|
|
:type flags:
|
|
``CompilerFlags``
|
|
:type auto_flags:
|
|
``bool``
|
|
:param auto_flags:
|
|
Whether to guess different flags if ``text`` can't be parsed with
|
|
``flags``.
|
|
:param mode:
|
|
Compilation mode: "exec", "single", or "eval".
|
|
:rtype:
|
|
``ast.Module``
|
|
"""
|
|
text = FileText(text)
|
|
filename = str(text.filename) if text.filename else "<unknown>"
|
|
source = text.joined
|
|
source = dedent(source)
|
|
if PY2 and isinstance(source, unicode):
|
|
source = source.encode('utf-8')
|
|
if not source.endswith("\n"):
|
|
# Ensure that the last line ends with a newline (``ast`` barfs
|
|
# otherwise).
|
|
source += "\n"
|
|
exp = None
|
|
for flags in _flags_to_try(source, flags, auto_flags, mode):
|
|
cflags = ast.PyCF_ONLY_AST | int(flags)
|
|
try:
|
|
result = compile(
|
|
source, filename, mode, flags=cflags, dont_inherit=1)
|
|
except SyntaxError as e:
|
|
exp = e
|
|
pass
|
|
else:
|
|
# Attach flags to the result.
|
|
result.input_flags = flags
|
|
result.source_flags = CompilerFlags.from_ast(result)
|
|
result.flags = result.input_flags | result.source_flags
|
|
result.text = text
|
|
return result
|
|
raise exp # SyntaxError
|
|
|
|
|
|
def _test_parse_string_literal(text, flags):
|
|
r"""
|
|
Attempt to parse ``text``. If it parses cleanly to a single string
|
|
literal, return its value. Otherwise return ``None``.
|
|
|
|
>>> _test_parse_string_literal(r'"foo\n" r"\nbar"', None)
|
|
'foo\n\\nbar'
|
|
|
|
"""
|
|
text = FileText(text)
|
|
if PY2:
|
|
try:
|
|
text.joined.encode('ascii')
|
|
except UnicodeError:
|
|
text = FileText(u'# encoding: utf-8\n' + unicode(text), filename=text.filename)
|
|
|
|
try:
|
|
module_node = _parse_ast_nodes(text, flags, False, "eval")
|
|
except SyntaxError:
|
|
return None
|
|
body = module_node.body
|
|
if not isinstance(body, (ast.Str, Bytes)):
|
|
return None
|
|
return body.s
|
|
|
|
|
|
AstNodeContext = namedtuple("AstNodeContext", "parent field index")
|
|
|
|
|
|
def _annotate_ast_nodes(ast_node):
|
|
"""
|
|
Annotate AST with:
|
|
- startpos and endpos
|
|
- [disabled for now: context as `AstNodeContext` ]
|
|
|
|
:type ast_node:
|
|
``ast.AST``
|
|
:param ast_node:
|
|
AST node returned by `_parse_ast_nodes`
|
|
:return:
|
|
``None``
|
|
"""
|
|
text = ast_node.text
|
|
flags = ast_node.flags
|
|
startpos = text.startpos
|
|
_annotate_ast_startpos(ast_node, None, startpos, text, flags)
|
|
# Not used for now:
|
|
# ast_node.context = AstNodeContext(None, None, None)
|
|
# _annotate_ast_context(ast_node)
|
|
|
|
|
|
def _annotate_ast_startpos(ast_node, parent_ast_node, minpos, text, flags):
|
|
r"""
|
|
Annotate ``ast_node``. Set ``ast_node.startpos`` to the starting position
|
|
of the node within ``text``.
|
|
|
|
For "typical" nodes, i.e. those other than multiline strings, this is
|
|
simply FilePos(ast_node.lineno, ast_node.col_offset+1), but taking
|
|
``text.startpos`` into account.
|
|
|
|
For multiline string nodes, this function works by trying to parse all
|
|
possible subranges of lines until finding the range that is syntactically
|
|
valid and matches ``value``. The candidate range is
|
|
text[min_start_lineno:lineno+text.startpos.lineno+1].
|
|
|
|
This function is unfortunately necessary because of a flaw in the output
|
|
produced by the Python built-in parser. For some crazy reason, the
|
|
``ast_node.lineno`` attribute represents something different for multiline
|
|
string literals versus all other statements. For multiline string literal
|
|
nodes and statements that are just a string expression (or more generally,
|
|
nodes where the first descendant leaf node is a multiline string literal),
|
|
the compiler attaches the ending line number as the value of the ``lineno``
|
|
attribute. For all other than AST nodes, the compiler attaches the
|
|
starting line number as the value of the ``lineno`` attribute. This means
|
|
e.g. the statement "'''foo\nbar'''" has a lineno value of 2, but the
|
|
statement "x='''foo\nbar'''" has a lineno value of 1.
|
|
|
|
:type ast_node:
|
|
``ast.AST``
|
|
:type minpos:
|
|
``FilePos``
|
|
:param minpos:
|
|
Earliest position to check, in the number space of ``text``.
|
|
:type text:
|
|
``FileText``
|
|
:param text:
|
|
Source text that was used to parse the AST, whose ``startpos`` should be
|
|
used in interpreting ``ast_node.lineno`` (which always starts at 1 for
|
|
the subset that was parsed).
|
|
:type flags:
|
|
``CompilerFlags``
|
|
:param flags:
|
|
Compiler flags to use when re-compiling code.
|
|
:return:
|
|
``True`` if this node is a multiline string literal or the first child is
|
|
such a node (recursively); ``False`` otherwise.
|
|
:raise ValueError:
|
|
Could not find the starting line number.
|
|
"""
|
|
assert isinstance(ast_node, (ast.AST, str, TypeIgnore)), ast_node
|
|
|
|
# joined strings and children do not carry a column offset on pre-3.8
|
|
# this prevent reformatting.
|
|
# set the column offset to the parent value before 3.8
|
|
if (3, 7) < sys.version_info < (3, 8):
|
|
if (
|
|
isinstance(ast_node, (getattr(ast, "JoinedStr", None), ast.FormattedValue))
|
|
or isinstance(
|
|
parent_ast_node, (getattr(ast, "JoinedStr", None), ast.FormattedValue)
|
|
)
|
|
) and ast_node.col_offset == -1:
|
|
ast_node.col_offset = parent_ast_node.col_offset
|
|
|
|
# First, traverse child nodes. If the first child node (recursively) is a
|
|
# multiline string, then we need to transfer its information to this node.
|
|
# Walk all nodes/fields of the AST. We implement this as a custom
|
|
# depth-first search instead of using ast.walk() or ast.NodeVisitor
|
|
# so that we can easily keep track of the preceding node's lineno.
|
|
child_minpos = minpos
|
|
is_first_child = True
|
|
leftstr_node = None
|
|
for child_node in _iter_child_nodes_in_order(ast_node):
|
|
leftstr = _annotate_ast_startpos(child_node, ast_node,
|
|
child_minpos, text, flags)
|
|
if is_first_child and leftstr:
|
|
leftstr_node = child_node
|
|
if hasattr(child_node, 'lineno') and not isinstance(child_node, TypeIgnore):
|
|
if child_node.startpos < child_minpos:
|
|
raise AssertionError(
|
|
"Got out-of-order AST node(s):\n"
|
|
" parent minpos=%s\n" % minpos +
|
|
" node: %s\n" % ast.dump(ast_node) +
|
|
" fields: %s\n" % (" ".join(ast_node._fields)) +
|
|
" children:\n" +
|
|
''.join(
|
|
" %s %9s: %s\n" % (
|
|
("==>" if cn is child_node else " "),
|
|
getattr(cn, 'startpos', ""),
|
|
ast.dump(cn))
|
|
for cn in _iter_child_nodes_in_order(ast_node)) +
|
|
"\n"
|
|
"This indicates a bug in pyflyby._\n"
|
|
"\n"
|
|
"pyflyby developer: Check if there's a bug or missing ast node handler in "
|
|
"pyflyby._parse._iter_child_nodes_in_order() - "
|
|
"probably the handler for ast.%s." % type(ast_node).__name__)
|
|
child_minpos = child_node.startpos
|
|
is_first_child = False
|
|
|
|
# If the node has no lineno at all, then skip it. This should only happen
|
|
# for nodes we don't care about, e.g. ``ast.Module`` or ``ast.alias``.
|
|
if not hasattr(ast_node, 'lineno') or isinstance(ast_node, TypeIgnore):
|
|
return False
|
|
# If col_offset is set then the lineno should be correct also.
|
|
if ast_node.col_offset >= 0:
|
|
# In Python 3.8+, FunctionDef.lineno is the line with the def. To
|
|
# account for decorators, we need the lineno of the first decorator
|
|
if (sys.version_info >= (3, 8)
|
|
and isinstance(ast_node, (ast.FunctionDef, ast.ClassDef))
|
|
and ast_node.decorator_list):
|
|
delta = (ast_node.decorator_list[0].lineno-1,
|
|
# The col_offset doesn't include the @
|
|
ast_node.decorator_list[0].col_offset - 1)
|
|
else:
|
|
delta = (ast_node.lineno-1, ast_node.col_offset)
|
|
|
|
# Not a multiline string literal. (I.e., it could be a non-string or
|
|
# a single-line string.)
|
|
# Easy.
|
|
startpos = text.startpos + delta
|
|
|
|
# Special case for 'with' statements. Consider the code:
|
|
# with X: pass
|
|
# ^0 ^5
|
|
# In python2.6, col_offset is 0.
|
|
# In python2.7, col_offset is 5.
|
|
# This is because python2.7 allows for multiple clauses:
|
|
# with X, Y: pass
|
|
# Since 'Y's col_offset isn't the beginning of the line, the authors
|
|
# of Python presumably changed 'X's col_offset to also not be the
|
|
# beginning of the line. If they had made the With ast node support
|
|
# multiple clauses, they wouldn't have needed to do that, but then
|
|
# that would introduce an API change in the AST. So it's
|
|
# understandable that they did that.
|
|
# Since we use startpos for breaking lines, we need to set startpos to
|
|
# the beginning of the line.
|
|
# In Python 3, the col_offset for the with is 0 again.
|
|
if (isinstance(ast_node, ast.With) and
|
|
not isinstance(parent_ast_node, ast.With) and
|
|
sys.version_info[:2] == (2,7)):
|
|
assert ast_node.col_offset >= 5
|
|
if startpos.lineno == text.startpos.lineno:
|
|
linestart = text.startpos.colno
|
|
else:
|
|
linestart = 1
|
|
line = text[(startpos.lineno,linestart):startpos]
|
|
m = re.search(r"\bwith\s+$", str(line))
|
|
assert m
|
|
lk = len(m.group()) # length of 'with ' including spaces
|
|
startpos = FilePos(startpos.lineno, startpos.colno - lk)
|
|
assert str(text[startpos:(startpos+(0,4))]) == "with"
|
|
ast_node.startpos = startpos
|
|
if sys.version_info <= (3, 8):
|
|
ast_node.startpos = max(startpos, minpos)
|
|
return False
|
|
|
|
assert ast_node.col_offset == -1
|
|
if leftstr_node:
|
|
# This is an ast node where the leftmost deepest leaf is a
|
|
# multiline string. The bug that multiline strings have broken
|
|
# lineno/col_offset infects ancestors up the tree.
|
|
#
|
|
# If the leftmost leaf is a multi-line string, then ``lineno``
|
|
# contains the ending line number, and col_offset is -1:
|
|
# >>> ast.parse("""'''foo\nbar'''+blah""").body[0].lineno
|
|
# 2
|
|
# But if the leftmost leaf is not a multi-line string, then
|
|
# ``lineno`` contains the starting line number:
|
|
# >>> ast.parse("""'''foobar'''+blah""").body[0].lineno
|
|
# 1
|
|
# >>> ast.parse("""blah+'''foo\nbar'''+blah""").body[0].lineno
|
|
# 1
|
|
#
|
|
# To fix that, we copy start_lineno and start_colno from the Str
|
|
# node once we've corrected the values.
|
|
assert not isinstance(ast_node, (ast.Str, Bytes))
|
|
assert leftstr_node.lineno == ast_node.lineno
|
|
assert leftstr_node.col_offset == -1
|
|
ast_node.startpos = leftstr_node.startpos
|
|
return True
|
|
|
|
# It should now be the case that we are looking at a multi-line string
|
|
# literal.
|
|
if sys.version_info >= (3, 7) and isinstance(ast_node, ast.FormattedValue):
|
|
ast_node.startpos = ast_node.value.startpos
|
|
ast_node.endpos = ast_node.value.startpos
|
|
|
|
return True
|
|
if not isinstance(ast_node, (ast.Str, Bytes)):
|
|
raise ValueError(
|
|
"got a non-string col_offset=-1: %s" % (ast.dump(ast_node)))
|
|
# The ``lineno`` attribute gives the ending line number of the multiline
|
|
# string ... unless it's multiple multiline strings that are concatenated
|
|
# by adjacency, in which case it's merely the end of the first one of
|
|
# them. At least we know that the start lineno is definitely not later
|
|
# than the ``lineno`` attribute.
|
|
first_end_lineno = text.startpos.lineno + ast_node.lineno - 1
|
|
# Compute possible start positions.
|
|
# The starting line number of this string could be anywhere between the
|
|
# end of the previous expression and ``first_end_lineno``.
|
|
startpos_candidates = []
|
|
assert minpos.lineno <= first_end_lineno
|
|
for start_lineno in range(minpos.lineno, first_end_lineno + 1):
|
|
start_line = text[start_lineno]
|
|
start_line_colno = (text.startpos.colno
|
|
if start_lineno==text.startpos.lineno else 1)
|
|
startpos_candidates.extend([
|
|
(_m.group()[-1], FilePos(start_lineno, _m.start()+start_line_colno))
|
|
for _m in re.finditer("[bBrRuU]*[\"\']", start_line)])
|
|
target_str = ast_node.s
|
|
|
|
if isinstance(target_str, bytes) and sys.version_info[:2] == (3, 7):
|
|
target_str = target_str.decode()
|
|
|
|
# Loop over possible end_linenos. The first one we've identified is the
|
|
# by far most likely one, but in theory it could be anywhere later in the
|
|
# file. This could be because of a dastardly concatenated string like
|
|
# this:
|
|
# """ # L1
|
|
# two # L2
|
|
# """ """ # L3
|
|
# four # L4
|
|
# five # L5
|
|
# six # L6
|
|
# """ # L7
|
|
# There are two substrings on L1:L3 and L3:L7. The parser gives us a
|
|
# single concatenated string, but sets lineno to 3 instead of 7. We don't
|
|
# have much to go on to figure out that the real end_lineno is 7. If we
|
|
# don't find the string ending on L3, then search forward looking for the
|
|
# real end of the string. Yuck!
|
|
#
|
|
# This is now complicated by fstrings that do interpolate variable on 3.7 fixed on 3.8+)
|
|
# where we'll try to guess based on prefix
|
|
f_string_candidate_prefixes = []
|
|
for end_lineno in range(first_end_lineno, text.endpos.lineno+1):
|
|
# Compute possible end positions. We're given the line we're ending
|
|
# on, but not the column position. Note that the ending line could
|
|
# contain more than just the string we're looking for -- including
|
|
# possibly other strings or comments.
|
|
end_line = text[end_lineno]
|
|
end_line_startcol = (
|
|
text.startpos.colno if end_lineno==text.startpos.lineno else 1)
|
|
endpos_candidates = [
|
|
(_m.group(), FilePos(end_lineno,_m.start()+end_line_startcol+1))
|
|
for _m in re.finditer("[\"\']", end_line)]
|
|
if not endpos_candidates:
|
|
# We found no endpos_candidates. This should not happen for
|
|
# first_end_lineno because there should be _some_ string that ends
|
|
# there.
|
|
if end_lineno == first_end_lineno:
|
|
raise AssertionError(
|
|
"No quote char found on line with supposed string")
|
|
continue
|
|
# Filter and sort the possible startpos candidates given this endpos
|
|
# candidate. It's possible for the starting quotechar and ending
|
|
# quotechar to be different in case of adjacent string concatenation,
|
|
# e.g. "foo"'''bar'''. That said, it's an unlikely case, so
|
|
# deprioritize checking them.
|
|
likely_candidates = []
|
|
unlikely_candidates = []
|
|
for end_quotechar, endpos in reversed(endpos_candidates):
|
|
for start_quotechar, startpos in startpos_candidates:
|
|
if not startpos < endpos:
|
|
continue
|
|
if start_quotechar == end_quotechar:
|
|
candidate_list = likely_candidates
|
|
else:
|
|
candidate_list = unlikely_candidates
|
|
candidate_list.append((startpos,endpos))
|
|
# Loop over sorted candidates.
|
|
matched_prefix = set()
|
|
for (startpos, endpos) in likely_candidates + unlikely_candidates:
|
|
# Try to parse the given range and see if it matches the target
|
|
# string literal.
|
|
subtext = text[startpos:endpos]
|
|
candidate_str = _test_parse_string_literal(subtext, flags)
|
|
if candidate_str is None:
|
|
continue
|
|
if isinstance(candidate_str, bytes) and sys.version_info[:2] == (3, 7):
|
|
candidate_str = candidate_str.decode()
|
|
|
|
maybe_fstring = False
|
|
try:
|
|
if (3, 7) <= sys.version_info <= (3, 8):
|
|
potential_start = text.lines[startpos.lineno - 1]
|
|
maybe_fstring = ("f'" in potential_start) or (
|
|
'f"' in potential_start
|
|
)
|
|
except IndexError:
|
|
pass
|
|
|
|
if target_str == candidate_str and target_str:
|
|
# Success!
|
|
ast_node.startpos = startpos
|
|
ast_node.endpos = endpos
|
|
# This node is a multiline string; and, it's a leaf, so by
|
|
# definition it is the leftmost node.
|
|
return True # all done
|
|
elif candidate_str and target_str.startswith(candidate_str):
|
|
matched_prefix.add(startpos)
|
|
elif maybe_fstring:
|
|
candidate_prefix = candidate_str.split("{")[0]
|
|
if candidate_prefix and target_str.startswith(candidate_prefix):
|
|
f_string_candidate_prefixes.append((startpos, endpos))
|
|
# We didn't find a string given the current end_lineno candidate.
|
|
# Only continue checking the startpos candidates that so far produced
|
|
# prefixes of the string we're looking for.
|
|
if not matched_prefix:
|
|
break
|
|
startpos_candidates = [
|
|
(sq, sp)
|
|
for (sq, sp) in startpos_candidates
|
|
if sp in matched_prefix
|
|
]
|
|
if (3, 7) <= sys.version_info <= (3, 8):
|
|
if len(f_string_candidate_prefixes) == 1:
|
|
# we did not find the string but there is one fstring candidate starting it
|
|
|
|
ast_node.startpos, ast_node.endpos = f_string_candidate_prefixes[0]
|
|
return True
|
|
elif isinstance(parent_ast_node, ast.JoinedStr):
|
|
self_pos = parent_ast_node.values.index(ast_node)
|
|
ast_node.startpos = parent_ast_node.values[self_pos - 1].startpos
|
|
ast_node.endpos = parent_ast_node.values[self_pos - 1].endpos
|
|
return True
|
|
raise ValueError("Couldn't find exact position of %s" % (ast.dump(ast_node)))
|
|
|
|
|
|
def _annotate_ast_context(ast_node):
|
|
"""
|
|
Recursively annotate ``context`` on ast nodes, setting ``context`` to
|
|
a `AstNodeContext` named tuple with values
|
|
``(parent, field, index)``.
|
|
Each ast_node satisfies ``parent.<field>[<index>] is ast_node``.
|
|
|
|
For non-list fields, the index part is ``None``.
|
|
"""
|
|
assert isinstance(ast_node, ast.AST)
|
|
for field_name, field_value in ast.iter_fields(ast_node):
|
|
if isinstance(field_value, ast.AST):
|
|
child_node = field_value
|
|
child_node.context = AstNodeContext(ast_node, field_name, None)
|
|
_annotate_ast_context(child_node)
|
|
elif isinstance(field_value, list):
|
|
for i, item in enumerate(field_value):
|
|
if isinstance(item, ast.AST):
|
|
child_node = item
|
|
child_node.context = AstNodeContext(ast_node, field_name, i)
|
|
_annotate_ast_context(child_node)
|
|
|
|
|
|
def _split_code_lines(ast_nodes, text):
|
|
"""
|
|
Split the given ``ast_nodes`` and corresponding ``text`` by code/noncode
|
|
statement.
|
|
|
|
Yield tuples of (nodes, subtext). ``nodes`` is a list of ``ast.AST`` nodes,
|
|
length 0 or 1; ``subtext`` is a `FileText` sliced from ``text``.
|
|
|
|
FileText(...))} for code lines and ``(None, FileText(...))`` for non-code
|
|
lines (comments and blanks).
|
|
|
|
:type ast_nodes:
|
|
sequence of ``ast.AST`` nodes
|
|
:type text:
|
|
`FileText`
|
|
"""
|
|
if not ast_nodes:
|
|
yield ([], text)
|
|
return
|
|
assert text.startpos <= ast_nodes[0].startpos
|
|
assert ast_nodes[-1].startpos < text.endpos
|
|
if text.startpos != ast_nodes[0].startpos:
|
|
# Starting noncode lines.
|
|
yield ([], text[text.startpos:ast_nodes[0].startpos])
|
|
end_sentinel = _DummyAst_Node()
|
|
end_sentinel.startpos = text.endpos
|
|
for node, next_node in zip(ast_nodes, ast_nodes[1:] + [end_sentinel]):
|
|
startpos = node.startpos
|
|
next_startpos = next_node.startpos
|
|
assert startpos < next_startpos
|
|
# We have the start position of this node. Figure out the end
|
|
# position, excluding noncode lines (standalone comments and blank
|
|
# lines).
|
|
if hasattr(node, 'endpos'):
|
|
# We have an endpos for the node because this was a multi-line
|
|
# string. Start with the node endpos.
|
|
endpos = node.endpos
|
|
assert startpos < endpos <= next_startpos
|
|
# enpos points to the character *after* the ending quote, so we
|
|
# know that this is never at the beginning of the line.
|
|
assert endpos.colno != 1
|
|
# Advance past whitespace an inline comment, if any. Do NOT
|
|
# advance past other code that could be on the same line, nor past
|
|
# blank lines and comments on subsequent lines.
|
|
line = text[endpos : min(text.endpos, FilePos(endpos.lineno+1,1))]
|
|
if _is_comment_or_blank(line):
|
|
endpos = FilePos(endpos.lineno+1, 1)
|
|
else:
|
|
endpos = next_startpos
|
|
assert endpos <= text.endpos
|
|
# We don't have an endpos yet; what we do have is the next node's
|
|
# startpos (or the position at the end of the text). Start there
|
|
# and work backward.
|
|
if endpos.colno != 1:
|
|
if endpos == text.endpos:
|
|
# There could be a comment on the last line and no
|
|
# trailing newline.
|
|
# TODO: do this in a more principled way.
|
|
if _is_comment_or_blank(text[endpos.lineno]):
|
|
assert startpos.lineno < endpos.lineno
|
|
if not text[endpos.lineno-1].endswith("\\"):
|
|
endpos = FilePos(endpos.lineno,1)
|
|
else:
|
|
# We're not at end of file, yet the next node starts in
|
|
# the middle of the line. This should only happen with if
|
|
# we're not looking at a comment. [The first character in
|
|
# the line could still be "#" if we're inside a multiline
|
|
# string that's the last child of the parent node.
|
|
# Therefore we don't assert 'not
|
|
# _is_comment_or_blank(...)'.]
|
|
pass
|
|
if endpos.colno == 1:
|
|
while (endpos.lineno-1 > startpos.lineno and
|
|
_is_comment_or_blank(text[endpos.lineno-1]) and
|
|
(not text[endpos.lineno-2].endswith("\\") or
|
|
_is_comment_or_blank(text[endpos.lineno-2]))):
|
|
endpos = FilePos(endpos.lineno-1, 1)
|
|
assert startpos < endpos <= next_startpos
|
|
yield ([node], text[startpos:endpos])
|
|
if endpos != next_startpos:
|
|
yield ([], text[endpos:next_startpos])
|
|
|
|
|
|
def _ast_node_is_in_docstring_position(ast_node):
|
|
"""
|
|
Given a ``Str`` AST node, return whether its position within the AST makes
|
|
it eligible as a docstring.
|
|
|
|
The main way a ``Str`` can be a docstring is if it is a standalone string
|
|
at the beginning of a ``Module``, ``FunctionDef``, or ``ClassDef``.
|
|
|
|
We also support variable docstrings per Epydoc:
|
|
|
|
- If a variable assignment statement is immediately followed by a bare
|
|
string literal, then that assignment is treated as a docstring for
|
|
that variable.
|
|
|
|
:type ast_node:
|
|
``ast.Str``
|
|
:param ast_node:
|
|
AST node that has been annotated by ``_annotate_ast_nodes``.
|
|
:rtype:
|
|
``bool``
|
|
:return:
|
|
Whether this string ast node is in docstring position.
|
|
"""
|
|
if not isinstance(ast_node, (ast.Str, Bytes)):
|
|
raise TypeError
|
|
expr_node = ast_node.context.parent
|
|
if not isinstance(expr_node, ast.Expr):
|
|
return False
|
|
assert ast_node.context.field == 'value'
|
|
assert ast_node.context.index is None
|
|
expr_ctx = expr_node.context
|
|
if expr_ctx.field != 'body':
|
|
return False
|
|
parent_node = expr_ctx.parent
|
|
if not isinstance(parent_node, (ast.FunctionDef, ast.ClassDef, ast.Module)):
|
|
return False
|
|
if expr_ctx.index == 0:
|
|
return True
|
|
prev_sibling_node = parent_node.body[expr_ctx.index-1]
|
|
if isinstance(prev_sibling_node, ast.Assign):
|
|
return True
|
|
return False
|
|
|
|
|
|
def infer_compile_mode(arg):
|
|
"""
|
|
Infer the mode needed to compile ``arg``.
|
|
|
|
:type arg:
|
|
``ast.AST``
|
|
:rtype:
|
|
``str``
|
|
"""
|
|
# Infer mode from ast object.
|
|
if isinstance(arg, ast.Module):
|
|
mode = "exec"
|
|
elif isinstance(arg, ast.Expression):
|
|
mode = "eval"
|
|
elif isinstance(arg, ast.Interactive):
|
|
mode = "single"
|
|
else:
|
|
raise TypeError(
|
|
"Expected Module/Expression/Interactive ast node; got %s"
|
|
% (type(arg).__name__))
|
|
return mode
|
|
|
|
|
|
class _DummyAst_Node(object):
|
|
pass
|
|
|
|
|
|
class PythonStatement(object):
|
|
r"""
|
|
Representation of a top-level Python statement or consecutive
|
|
comments/blank lines.
|
|
|
|
>>> PythonStatement('print("x",\n file=None)\n', flags='print_function') #doctest: +SKIP
|
|
PythonStatement('print("x",\n file=None)\n', flags=0x10000)
|
|
|
|
Implemented as a wrapper around a `PythonBlock` containing at most one
|
|
top-level AST node.
|
|
"""
|
|
|
|
def __new__(cls, arg, filename=None, startpos=None, flags=None):
|
|
if isinstance(arg, cls):
|
|
if filename is startpos is flags is None:
|
|
return arg
|
|
arg = arg.block
|
|
# Fall through
|
|
if isinstance(arg, (PythonBlock, FileText, str, six.text_type)):
|
|
block = PythonBlock(arg, filename=filename,
|
|
startpos=startpos, flags=flags)
|
|
statements = block.statements
|
|
if len(statements) != 1:
|
|
raise ValueError(
|
|
"Code contains %d statements instead of exactly 1: %r"
|
|
% (len(statements), block))
|
|
statement, = statements
|
|
assert isinstance(statement, cls)
|
|
return statement
|
|
raise TypeError("PythonStatement: unexpected %s" % (type(arg).__name__,))
|
|
|
|
@classmethod
|
|
def _construct_from_block(cls, block):
|
|
# Only to be used by PythonBlock.
|
|
assert isinstance(block, PythonBlock)
|
|
self = object.__new__(cls)
|
|
self.block = block
|
|
return self
|
|
|
|
@property
|
|
def text(self):
|
|
"""
|
|
:rtype:
|
|
`FileText`
|
|
"""
|
|
return self.block.text
|
|
|
|
@property
|
|
def filename(self):
|
|
"""
|
|
:rtype:
|
|
`Filename`
|
|
"""
|
|
return self.text.filename
|
|
|
|
@property
|
|
def startpos(self):
|
|
"""
|
|
:rtype:
|
|
`FilePos`
|
|
"""
|
|
return self.text.startpos
|
|
|
|
@property
|
|
def flags(self):
|
|
"""
|
|
:rtype:
|
|
`CompilerFlags`
|
|
"""
|
|
return self.block.flags
|
|
|
|
@property
|
|
def ast_node(self):
|
|
"""
|
|
A single AST node representing this statement, or ``None`` if this
|
|
object only represents comments/blanks.
|
|
|
|
:rtype:
|
|
``ast.AST`` or ``NoneType``
|
|
"""
|
|
ast_nodes = self.block.ast_node.body
|
|
if len(ast_nodes) == 0:
|
|
return None
|
|
if len(ast_nodes) == 1:
|
|
return ast_nodes[0]
|
|
raise AssertionError("More than one AST node in block")
|
|
|
|
@property
|
|
def is_comment_or_blank(self):
|
|
return self.ast_node is None
|
|
|
|
@property
|
|
def is_comment_or_blank_or_string_literal(self):
|
|
return (self.is_comment_or_blank
|
|
or _ast_str_literal_value(self.ast_node) is not None)
|
|
|
|
@property
|
|
def is_import(self):
|
|
return isinstance(self.ast_node, (ast.Import, ast.ImportFrom))
|
|
|
|
@property
|
|
def is_single_assign(self):
|
|
n = self.ast_node
|
|
return isinstance(n, ast.Assign) and len(n.targets) == 1
|
|
|
|
def get_assignment_literal_value(self):
|
|
"""
|
|
If the statement is an assignment, return the name and literal value.
|
|
|
|
>>> PythonStatement('foo = {1: {2: 3}}').get_assignment_literal_value()
|
|
('foo', {1: {2: 3}})
|
|
|
|
:return:
|
|
(target, literal_value)
|
|
"""
|
|
if not self.is_single_assign:
|
|
raise ValueError(
|
|
"Statement is not an assignment to a single name: %s" % self)
|
|
n = self.ast_node
|
|
target_name = n.targets[0].id
|
|
literal_value = ast.literal_eval(n.value)
|
|
return (target_name, literal_value)
|
|
|
|
def __repr__(self):
|
|
r = repr(self.block)
|
|
assert r.startswith("PythonBlock(")
|
|
r = "PythonStatement(" + r[12:]
|
|
return r
|
|
|
|
def __eq__(self, other):
|
|
if self is other:
|
|
return True
|
|
if not isinstance(other, PythonStatement):
|
|
return NotImplemented
|
|
return self.block == other.block
|
|
|
|
def __ne__(self, other):
|
|
return not (self == other)
|
|
|
|
# The rest are defined by total_ordering
|
|
def __lt__(self, other):
|
|
if not isinstance(other, PythonStatement):
|
|
return NotImplemented
|
|
return self.block < other.block
|
|
|
|
def __cmp__(self, other):
|
|
if self is other:
|
|
return 0
|
|
if not isinstance(other, PythonStatement):
|
|
return NotImplemented
|
|
return cmp(self.block, other.block)
|
|
|
|
def __hash__(self):
|
|
return hash(self.block)
|
|
|
|
|
|
@total_ordering
|
|
class PythonBlock(object):
|
|
r"""
|
|
Representation of a sequence of consecutive top-level
|
|
`PythonStatement` (s).
|
|
|
|
>>> source_code = '# 1\nprint(2)\n# 3\n# 4\nprint(5)\nx=[6,\n 7]\n# 8\n'
|
|
>>> codeblock = PythonBlock(source_code)
|
|
>>> for stmt in PythonBlock(codeblock).statements:
|
|
... print(stmt)
|
|
PythonStatement('# 1\n')
|
|
PythonStatement('print(2)\n', startpos=(2,1))
|
|
PythonStatement('# 3\n# 4\n', startpos=(3,1))
|
|
PythonStatement('print(5)\n', startpos=(5,1))
|
|
PythonStatement('x=[6,\n 7]\n', startpos=(6,1))
|
|
PythonStatement('# 8\n', startpos=(8,1))
|
|
|
|
A ``PythonBlock`` has a ``flags`` attribute that gives the compiler_flags
|
|
associated with the __future__ features using which the code should be
|
|
parsed.
|
|
|
|
"""
|
|
|
|
def __new__(cls, arg, filename=None, startpos=None, flags=None,
|
|
auto_flags=None):
|
|
if isinstance(arg, PythonStatement):
|
|
arg = arg.block
|
|
# Fall through
|
|
if isinstance(arg, cls):
|
|
if filename is startpos is flags is None:
|
|
return arg
|
|
flags = CompilerFlags(flags, arg.flags)
|
|
arg = arg.text
|
|
# Fall through
|
|
if isinstance(arg, (FileText, Filename, str, six.text_type)):
|
|
return cls.from_text(
|
|
arg, filename=filename, startpos=startpos,
|
|
flags=flags, auto_flags=auto_flags)
|
|
raise TypeError("%s: unexpected %s"
|
|
% (cls.__name__, type(arg).__name__,))
|
|
|
|
@classmethod
|
|
def from_filename(cls, filename):
|
|
return cls.from_text(Filename(filename))
|
|
|
|
@classmethod
|
|
def from_text(cls, text, filename=None, startpos=None, flags=None,
|
|
auto_flags=False):
|
|
"""
|
|
:type text:
|
|
`FileText` or convertible
|
|
:type filename:
|
|
``Filename``
|
|
:param filename:
|
|
Filename, if not already given by ``text``.
|
|
:type startpos:
|
|
``FilePos``
|
|
:param startpos:
|
|
Starting position, if not already given by ``text``.
|
|
:type flags:
|
|
``CompilerFlags``
|
|
:param flags:
|
|
Input compiler flags.
|
|
:param auto_flags:
|
|
Whether to try other flags if ``flags`` fails.
|
|
:rtype:
|
|
`PythonBlock`
|
|
"""
|
|
text = FileText(text, filename=filename, startpos=startpos)
|
|
self = object.__new__(cls)
|
|
self.text = text
|
|
self._input_flags = CompilerFlags(flags)
|
|
self._auto_flags = auto_flags
|
|
return self
|
|
|
|
@classmethod
|
|
def __construct_from_annotated_ast(cls, annotated_ast_nodes, text, flags):
|
|
# Constructor for internal use by _split_by_statement() or
|
|
# concatenate().
|
|
ast_node = ast.Module(annotated_ast_nodes)
|
|
ast_node.text = text
|
|
ast_node.flags = flags
|
|
if not hasattr(ast_node, "source_flags"):
|
|
ast_node.source_flags = CompilerFlags.from_ast(annotated_ast_nodes)
|
|
self = object.__new__(cls)
|
|
self._ast_node_or_parse_exception = ast_node
|
|
self.ast_node = ast_node
|
|
self.annotated_ast_node = ast_node
|
|
self.text = text
|
|
self.flags = self._input_flags = flags
|
|
self._auto_flags = False
|
|
return self
|
|
|
|
@classmethod
|
|
def concatenate(cls, blocks, assume_contiguous=False):
|
|
"""
|
|
Concatenate a bunch of blocks into one block.
|
|
|
|
:type blocks:
|
|
sequence of `PythonBlock` s and/or `PythonStatement` s
|
|
:param assume_contiguous:
|
|
Whether to assume, without checking, that the input blocks were
|
|
originally all contiguous. This must be set to True to indicate the
|
|
caller understands the assumption; False is not implemented.
|
|
"""
|
|
if not assume_contiguous:
|
|
raise NotImplementedError
|
|
blocks = [PythonBlock(b) for b in blocks]
|
|
if len(blocks) == 1:
|
|
return blocks[0]
|
|
assert blocks
|
|
text = FileText.concatenate([b.text for b in blocks])
|
|
# The contiguous assumption is important here because ``ast_node``
|
|
# contains line information that would otherwise be wrong.
|
|
ast_nodes = [n for b in blocks for n in b.annotated_ast_node.body]
|
|
flags = blocks[0].flags
|
|
return cls.__construct_from_annotated_ast(ast_nodes, text, flags)
|
|
|
|
@property
|
|
def filename(self):
|
|
return self.text.filename
|
|
|
|
@property
|
|
def startpos(self):
|
|
return self.text.startpos
|
|
|
|
@property
|
|
def endpos(self):
|
|
return self.text.endpos
|
|
|
|
@cached_attribute
|
|
def _ast_node_or_parse_exception(self):
|
|
"""
|
|
Attempt to parse this block of code into an abstract syntax tree.
|
|
Cached (including exception case).
|
|
|
|
:return:
|
|
Either ast_node or exception.
|
|
"""
|
|
# This attribute may also be set by __construct_from_annotated_ast(),
|
|
# in which case this code does not run.
|
|
try:
|
|
return _parse_ast_nodes(
|
|
self.text, self._input_flags, self._auto_flags, "exec")
|
|
except Exception as e:
|
|
# Add the filename to the exception message to be nicer.
|
|
if self.text.filename:
|
|
try:
|
|
e = type(e)("While parsing %s: %s" % (self.text.filename, e))
|
|
except TypeError:
|
|
# Exception takes more than one argument
|
|
pass
|
|
# Cache the exception to avoid re-attempting while debugging.
|
|
return e
|
|
|
|
@cached_attribute
|
|
def parsable(self):
|
|
"""
|
|
Whether the contents of this ``PythonBlock`` are parsable as Python
|
|
code, using the given flags.
|
|
|
|
:rtype:
|
|
``bool``
|
|
"""
|
|
return isinstance(self._ast_node_or_parse_exception, ast.AST)
|
|
|
|
@cached_attribute
|
|
def parsable_as_expression(self):
|
|
"""
|
|
Whether the contents of this ``PythonBlock`` are parsable as a single
|
|
Python expression, using the given flags.
|
|
|
|
:rtype:
|
|
``bool``
|
|
"""
|
|
return self.parsable and self.expression_ast_node is not None
|
|
|
|
@cached_attribute
|
|
def ast_node(self):
|
|
"""
|
|
Parse this block of code into an abstract syntax tree.
|
|
|
|
The returned object type is the kind of AST as returned by the
|
|
``compile`` built-in (rather than as returned by the older, deprecated
|
|
``compiler`` module). The code is parsed using mode="exec".
|
|
|
|
The result is a ``ast.Module`` node, even if this block represents only
|
|
a subset of the entire file.
|
|
|
|
:rtype:
|
|
``ast.Module``
|
|
"""
|
|
r = self._ast_node_or_parse_exception
|
|
if isinstance(r, ast.AST):
|
|
return r
|
|
else:
|
|
raise r
|
|
|
|
@cached_attribute
|
|
def annotated_ast_node(self):
|
|
"""
|
|
Return ``self.ast_node``, annotated in place with positions.
|
|
|
|
All nodes are annotated with ``startpos``.
|
|
All top-level nodes are annotated with ``endpos``.
|
|
|
|
:rtype:
|
|
``ast.Module``
|
|
"""
|
|
result = self.ast_node
|
|
_annotate_ast_nodes(result)
|
|
return result
|
|
|
|
@cached_attribute
|
|
def expression_ast_node(self):
|
|
"""
|
|
Return an ``ast.Expression`` if ``self.ast_node`` can be converted into
|
|
one. I.e., return parse(self.text, mode="eval"), if possible.
|
|
|
|
Otherwise, return ``None``.
|
|
|
|
:rtype:
|
|
``ast.Expression``
|
|
"""
|
|
node = self.ast_node
|
|
if len(node.body) == 1 and isinstance(node.body[0], ast.Expr):
|
|
return ast.Expression(node.body[0].value)
|
|
else:
|
|
return None
|
|
|
|
def parse(self, mode=None):
|
|
"""
|
|
Parse the source text into an AST.
|
|
|
|
:param mode:
|
|
Compilation mode: "exec", "single", or "eval". "exec", "single",
|
|
and "eval" work as the built-in ``compile`` function do. If ``None``,
|
|
then default to "eval" if the input is a string with a single
|
|
expression, else "exec".
|
|
:rtype:
|
|
``ast.AST``
|
|
"""
|
|
if mode == "exec":
|
|
return self.ast_node
|
|
elif mode == "eval":
|
|
if self.expression_ast_node:
|
|
return self.expression_ast_node
|
|
else:
|
|
raise SyntaxError
|
|
elif mode == None:
|
|
if self.expression_ast_node:
|
|
return self.expression_ast_node
|
|
else:
|
|
return self.ast_node
|
|
elif mode == "exec":
|
|
raise NotImplementedError
|
|
else:
|
|
raise ValueError("parse(): invalid mode=%r" % (mode,))
|
|
|
|
def compile(self, mode=None):
|
|
"""
|
|
Parse into AST and compile AST into code.
|
|
|
|
:rtype:
|
|
``CodeType``
|
|
"""
|
|
ast_node = self.parse(mode=mode)
|
|
mode = infer_compile_mode(ast_node)
|
|
filename = str(self.filename or "<unknown>")
|
|
return compile(ast_node, filename, mode)
|
|
|
|
@cached_attribute
|
|
def statements(self):
|
|
r"""
|
|
Partition of this ``PythonBlock`` into individual ``PythonStatement`` s.
|
|
Each one contains at most 1 top-level ast node. A ``PythonStatement``
|
|
can contain no ast node to represent comments.
|
|
|
|
>>> code = "# multiline\n# comment\n'''multiline\nstring'''\nblah\n"
|
|
>>> print(PythonBlock(code).statements) # doctest:+NORMALIZE_WHITESPACE
|
|
(PythonStatement('# multiline\n# comment\n'),
|
|
PythonStatement("'''multiline\nstring'''\n", startpos=(3,1)),
|
|
PythonStatement('blah\n', startpos=(5,1)))
|
|
|
|
:rtype:
|
|
``tuple`` of `PythonStatement` s
|
|
"""
|
|
node = self.annotated_ast_node
|
|
nodes_subtexts = list(_split_code_lines(node.body, self.text))
|
|
if nodes_subtexts == [(self.ast_node.body, self.text)]:
|
|
# This block is either all comments/blanks or a single statement
|
|
# with no surrounding whitespace/comment lines. Return self.
|
|
return (PythonStatement._construct_from_block(self),)
|
|
cls = type(self)
|
|
statement_blocks = [
|
|
cls.__construct_from_annotated_ast(subnodes, subtext, self.flags)
|
|
for subnodes, subtext in nodes_subtexts]
|
|
# Convert to statements.
|
|
statements = []
|
|
for b in statement_blocks:
|
|
statement = PythonStatement._construct_from_block(b)
|
|
statements.append(statement)
|
|
# Optimization: set the new sub-block's ``statements`` attribute
|
|
# since we already know it contains exactly one statement, itself.
|
|
assert 'statements' not in b.__dict__
|
|
b.statements = (statement,)
|
|
return tuple(statements)
|
|
|
|
@cached_attribute
|
|
def source_flags(self):
|
|
"""
|
|
If the AST contains __future__ imports, then the compiler_flags
|
|
associated with them. Otherwise, 0.
|
|
|
|
The difference between ``source_flags`` and ``flags`` is that ``flags``
|
|
may be set by the caller (e.g. based on an earlier __future__ import)
|
|
and include automatically guessed flags, whereas ``source_flags`` is
|
|
only nonzero if this code itself contains __future__ imports.
|
|
|
|
:rtype:
|
|
`CompilerFlags`
|
|
"""
|
|
return self.ast_node.source_flags
|
|
|
|
@cached_attribute
|
|
def flags(self):
|
|
"""
|
|
The compiler flags for this code block, including both the input flags
|
|
(possibly automatically guessed), and the flags from "__future__"
|
|
imports in the source code text.
|
|
|
|
:rtype:
|
|
`CompilerFlags`
|
|
"""
|
|
return self.ast_node.flags
|
|
|
|
def groupby(self, predicate):
|
|
"""
|
|
Partition this block of code into smaller blocks of code which
|
|
consecutively have the same ``predicate``.
|
|
|
|
:param predicate:
|
|
Function that takes a `PythonStatement` and returns a value.
|
|
:return:
|
|
Generator that yields (group, `PythonBlock` s).
|
|
"""
|
|
cls = type(self)
|
|
for pred, stmts in groupby(self.statements, predicate):
|
|
blocks = [s.block for s in stmts]
|
|
yield pred, cls.concatenate(blocks, assume_contiguous=True)
|
|
|
|
def string_literals(self):
|
|
r"""
|
|
Yield all string literals anywhere in this block.
|
|
|
|
The string literals have ``startpos`` attributes attached.
|
|
|
|
>>> block = PythonBlock("'a' + ('b' + \n'c')")
|
|
>>> [(f.s, f.startpos) for f in block.string_literals()]
|
|
[('a', FilePos(1,1)), ('b', FilePos(1,8)), ('c', FilePos(2,1))]
|
|
|
|
:return:
|
|
Iterable of ``ast.Str`` or ``ast.Bytes`` nodes
|
|
"""
|
|
for node in _walk_ast_nodes_in_order(self.annotated_ast_node):
|
|
if isinstance(node, (ast.Str, Bytes)):
|
|
assert hasattr(node, 'startpos')
|
|
yield node
|
|
|
|
def _get_docstring_nodes(self):
|
|
"""
|
|
Yield docstring AST nodes.
|
|
|
|
We consider the following to be docstrings::
|
|
|
|
- First literal string of function definitions, class definitions,
|
|
and modules (the python standard)
|
|
- Literal strings after assignments, per Epydoc
|
|
|
|
:rtype:
|
|
Generator of ``ast.Str`` nodes
|
|
"""
|
|
# This is similar to ``ast.get_docstring``, but:
|
|
# - This function is recursive
|
|
# - This function yields the node object, rather than the string
|
|
# - This function yields multiple docstrings (even per ast node)
|
|
# - This function doesn't raise TypeError on other AST types
|
|
# - This function doesn't cleandoc
|
|
# A previous implementation did
|
|
# [n for n in self.string_literals()
|
|
# if _ast_node_is_in_docstring_position(n)]
|
|
# However, the method we now use is more straightforward, and doesn't
|
|
# require first annotating each node with context information.
|
|
docstring_containers = (ast.FunctionDef, ast.ClassDef, ast.Module)
|
|
for node in _walk_ast_nodes_in_order(self.annotated_ast_node):
|
|
if not isinstance(node, docstring_containers):
|
|
continue
|
|
if not node.body:
|
|
continue
|
|
# If the first body item is a literal string, then yield the node.
|
|
if (isinstance(node.body[0], ast.Expr) and
|
|
isinstance(node.body[0].value, ast.Str)):
|
|
yield node.body[0].value
|
|
for i in range(1, len(node.body)-1):
|
|
# If a body item is an assignment and the next one is a
|
|
# literal string, then yield the node for the literal string.
|
|
n1, n2 = node.body[i], node.body[i+1]
|
|
if (isinstance(n1, ast.Assign) and
|
|
isinstance(n2, ast.Expr) and
|
|
isinstance(n2.value, ast.Str)):
|
|
yield n2.value
|
|
|
|
def get_doctests(self):
|
|
r"""
|
|
Return doctests in this code.
|
|
|
|
>>> PythonBlock("x\n'''\n >>> foo(bar\n ... + baz)\n'''\n").get_doctests()
|
|
[PythonBlock('foo(bar\n + baz)\n', startpos=(3,2))]
|
|
|
|
:rtype:
|
|
``list`` of `PythonStatement` s
|
|
"""
|
|
parser = IgnoreOptionsDocTestParser()
|
|
doctest_blocks = []
|
|
filename = self.filename
|
|
flags = self.flags
|
|
for ast_node in self._get_docstring_nodes():
|
|
try:
|
|
examples = parser.get_examples(ast_node.s)
|
|
except Exception:
|
|
blob = ast_node.s
|
|
if len(blob) > 60:
|
|
blob = blob[:60] + '...'
|
|
# TODO: let caller decide how to handle
|
|
logger.warning("Can't parse docstring; ignoring: %r", blob)
|
|
continue
|
|
for example in examples:
|
|
lineno = ast_node.startpos.lineno + example.lineno
|
|
colno = ast_node.startpos.colno + example.indent # dubious
|
|
text = FileText(example.source, filename=filename,
|
|
startpos=(lineno,colno))
|
|
try:
|
|
block = PythonBlock(text, flags=flags)
|
|
block.ast_node # make sure we can parse
|
|
except Exception:
|
|
blob = text.joined
|
|
if len(blob) > 60:
|
|
blob = blob[:60] + '...'
|
|
logger.warning("Can't parse doctest; ignoring: %r", blob)
|
|
continue
|
|
doctest_blocks.append(block)
|
|
return doctest_blocks
|
|
|
|
def __repr__(self):
|
|
r = "%s(%r" % (type(self).__name__, self.text.joined)
|
|
if self.filename:
|
|
r += ", filename=%r" % (str(self.filename),)
|
|
if self.startpos != FilePos():
|
|
r += ", startpos=%s" % (self.startpos,)
|
|
if self.flags != self.source_flags:
|
|
r += ", flags=%s" % (self.flags,)
|
|
r += ")"
|
|
return r
|
|
|
|
def __str__(self):
|
|
return str(self.text)
|
|
|
|
def __text__(self):
|
|
return self.text
|
|
|
|
def __eq__(self, other):
|
|
if self is other:
|
|
return True
|
|
if not isinstance(other, PythonBlock):
|
|
return NotImplemented
|
|
return self.text == other.text and self.flags == other.flags
|
|
|
|
def __ne__(self, other):
|
|
return not (self == other)
|
|
|
|
# The rest are defined by total_ordering
|
|
def __lt__(self, other):
|
|
if not isinstance(other, PythonBlock):
|
|
return NotImplemented
|
|
return (self.text, self.flags) < (other.text, other.flags)
|
|
|
|
def __cmp__(self, other):
|
|
if self is other:
|
|
return 0
|
|
if not isinstance(other, PythonBlock):
|
|
return NotImplemented
|
|
return cmp(self.text, other.text) or cmp(self.flags, other.flags)
|
|
|
|
def __hash__(self):
|
|
h = hash((self.text, self.flags))
|
|
self.__hash__ = lambda: h
|
|
return h
|
|
|
|
class IgnoreOptionsDocTestParser(DocTestParser):
|
|
def _find_options(self, source, name, lineno):
|
|
# Ignore doctest options. We don't use them, and we don't want to
|
|
# error on unknown options, which is what the default DocTestParser
|
|
# does.
|
|
return {}
|