creeper-adventure/.venv/lib/python3.8/site-packages/pyflyby/_docxref.py
2022-03-31 20:20:07 -05:00

382 lines
14 KiB
Python

# pyflyby/_docxref.py.
# Module for checking Epydoc cross-references.
# Portions of the code below are derived from Epydoc, which is distributed
# under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and any associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the
# following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# The software is provided "as is", without warranty of any kind, express or
# implied, including but not limited to the warranties of merchantability,
# fitness for a particular purpose and noninfringement. In no event shall
# the authors or copyright holders be liable for any claim, damages or other
# liability, whether in an action of contract, tort or otherwise, arising
# from, out of or in connection with the software or the use or other
# dealings in the software.
from __future__ import (absolute_import, division, print_function,
with_statement)
import re
import six
from six.moves import builtins
from textwrap import dedent
from epydoc.apidoc import (ClassDoc, ModuleDoc, PropertyDoc,
RoutineDoc, UNKNOWN, VariableDoc)
from epydoc.docbuilder import build_doc_index
from epydoc.markup.plaintext import ParsedPlaintextDocstring
from pyflyby._file import Filename
from pyflyby._idents import DottedIdentifier
from pyflyby._log import logger
from pyflyby._modules import ModuleHandle
from pyflyby._util import cached_attribute, memoize, prefixes
# If someone references numpy.*, just assume it's OK - it's not worth
# following into numpy because it's too slow.
ASSUME_MODULES_OK = set(['numpy'])
@memoize
def map_strings_to_line_numbers(module):
"""
Walk ``module.ast``, looking at all string literals. Return a map from
string literals to line numbers (1-index).
:rtype:
``dict`` from ``str`` to (``int``, ``str``)
"""
d = {}
for field in module.block.string_literals():
# Dedent because epydoc dedents strings and we need to look up by
# those. But keep track of original version because we need to count
# exact line numbers.
s = dedent(field.s).strip()
start_lineno = field.startpos.lineno
d[s] = (start_lineno, field.s)
return d
def get_string_linenos(module, searchstring, within_string):
"""
Return the line numbers (1-indexed) within ``filename`` that contain
``searchstring``. Only consider string literals (i.e. not comments).
First look for exact matches of ``within_string`` (modulo indenting) and
then search within that. Only if the ``within_string`` is not found,
search the entire file.
[If there's a comment on the same line as a string that also contains the
searchstring, we'll get confused.]
"""
module = ModuleHandle(module)
regexp = re.compile(searchstring)
map = map_strings_to_line_numbers(module)
results = []
def scan_within_string(results, start_lineno, orig_full_string):
for i, line in enumerate(orig_full_string.splitlines()):
if regexp.search(line):
results.append( start_lineno + i )
try:
lineno, orig_full_string = map[within_string.strip()]
except KeyError:
pass
else:
# We found the larger string exactly within the ast.
scan_within_string(results, lineno, orig_full_string)
if results:
return tuple(results)
# We could continue down if this ever happened.
raise Exception(
"Found superstring in %r but not substring %r within superstring"
% (module.filename, searchstring))
# Try a full text search.
for lineno, orig_full_string in map.values():
scan_within_string(results, lineno, orig_full_string)
if results:
return tuple(sorted(results))
raise Exception(
"Could not find %r anywhere in %r" % (searchstring, module.filename))
def describe_xref(identifier, container):
module = ModuleHandle(str(container.defining_module.canonical_name))
assert module.filename == Filename(container.defining_module.filename)
linenos = get_string_linenos(
module,
"(L{|<)%s" % (identifier,),
container.docstring)
return (module, linenos, str(container.canonical_name), identifier)
def safe_build_doc_index(modules):
# build_doc_index isn't re-entrant due to crappy caching! >:(
from epydoc.docintrospecter import clear_cache
clear_cache()
from epydoc.docparser import _moduledoc_cache
_moduledoc_cache.clear()
# Build a new DocIndex. It swallows exceptions and returns None on error!
# >:(
result = build_doc_index(modules)
if result is None:
raise Exception("Failed to build doc index on %r" % (modules,))
return result
class ExpandedDocIndex(object):
"""
A wrapper around DocIndex that automatically expands with more modules as
needed.
"""
# TODO: this is kludgy and inefficient since it re-reads modules.
def __init__(self, modules):
self.modules = set([ModuleHandle(m) for m in modules])
def add_module(self, module):
"""
Adds ``module`` and recreates the DocIndex with the updated set of
modules.
:return:
Whether anything was added.
"""
module = ModuleHandle(module)
for prefix in module.ancestors:
if prefix in self.modules:
# The module, or a prefix of it, was already added.
return False
for existing_module in sorted(self.modules):
if existing_module.startswith(module):
# This supersedes an existing module.
assert existing_module != module
self.modules.remove(existing_module)
logger.debug("Expanding docindex to include %r", module)
self.modules.add(module)
del self.docindex
return True
def find(self, a, b):
return self.docindex.find(a, b)
def get_vardoc(self, a):
return self.docindex.get_vardoc(a)
@cached_attribute
def docindex(self):
return safe_build_doc_index(
[str(m.name) for m in sorted(self.modules)])
def remove_epydoc_sym_suffix(s):
"""
Remove trailing "'" that Epydoc annoyingly adds to 'shadowed' names.
>>> remove_epydoc_sym_suffix("a.b'.c'.d")
'a.b.c.d'
"""
return re.sub(r"'([.]|$)", r'\1', s)
class XrefScanner(object):
def __init__(self, modules):
self.modules = modules
self.docindex = safe_build_doc_index(modules)
@cached_attribute
def expanded_docindex(self):
return ExpandedDocIndex(self.modules)
def scan(self):
self._failed_xrefs = []
valdocs = sorted(self.docindex.reachable_valdocs(
imports=False, packages=False, bases=False, submodules=False,
subclasses=False, private=True
))
for doc in valdocs:
if isinstance(doc, ClassDoc):
self.scan_class(doc)
elif isinstance(doc, ModuleDoc):
self.scan_module(doc)
return tuple(sorted(self._failed_xrefs))
def scan_module(self, doc):
self.descr(doc)
if doc.is_package is True:
for submodule in doc.submodules:
self.scan_module(submodule)
# self.scan_module_list(doc)
self.scan_details_list(doc, "function")
self.scan_details_list(doc, "other")
def scan_class(self, doc):
self.descr(doc)
self.scan_details_list(doc, "method")
self.scan_details_list(doc, "classvariable")
self.scan_details_list(doc, "instancevariable")
self.scan_details_list(doc, "property")
def scan_details_list(self, doc, value_type):
detailed = True
if isinstance(doc, ClassDoc):
var_docs = doc.select_variables(value_type=value_type,
imported=False, inherited=False,
public=None,
detailed=detailed)
else:
var_docs = doc.select_variables(value_type=value_type,
imported=False,
public=None,
detailed=detailed)
for var_doc in var_docs:
self.scan_details(var_doc)
def scan_details(self, var_doc):
self.descr(var_doc)
if isinstance(var_doc.value, RoutineDoc):
self.return_type(var_doc)
self.return_descr(var_doc)
for (arg_names, arg_descr) in var_doc.value.arg_descrs:
self.scan_docstring(arg_descr, var_doc.value)
for arg in var_doc.value.arg_types:
self.scan_docstring(
var_doc.value.arg_types[arg], var_doc.value)
elif isinstance(var_doc.value, PropertyDoc):
prop_doc = var_doc.value
self.return_type(prop_doc.fget)
self.return_type(prop_doc.fset)
self.return_type(prop_doc.fdel)
else:
self.type_descr(var_doc)
def _scan_attr(self, attr, api_doc):
if api_doc in (None, UNKNOWN):
return ''
pds = getattr(api_doc, attr, None) # pds = ParsedDocstring.
if pds not in (None, UNKNOWN):
self.scan_docstring(pds, api_doc)
elif isinstance(api_doc, VariableDoc):
self._scan_attr(attr, api_doc.value)
def summary(self, api_doc):
self._scan_attr('summary', api_doc)
def descr(self, api_doc):
self._scan_attr('descr', api_doc)
def type_descr(self, api_doc):
self._scan_attr('type_descr', api_doc)
def return_type(self, api_doc):
self._scan_attr('return_type', api_doc)
def return_descr(self, api_doc):
self._scan_attr('return_descr', api_doc)
def check_xref(self, identifier, container):
"""
Check that ``identifier`` cross-references a proper symbol.
Look in modules that we weren't explicitly asked to look in, if
needed.
"""
if identifier in builtins.__dict__:
return True
def check_container():
if self.expanded_docindex.find(identifier, container) is not None:
return True
if isinstance(container, RoutineDoc):
tcontainer = self.expanded_docindex.get_vardoc(
container.canonical_name)
doc = self.expanded_docindex.find(identifier, tcontainer)
while (doc is not None and tcontainer not in (None, UNKNOWN)
and tcontainer.overrides not in (None, UNKNOWN)):
tcontainer = tcontainer.overrides
doc = self.expanded_docindex.find(identifier, tcontainer)
return doc is not None
return False
def check_defining_module(x):
if x is None:
return False
defining_module_name = remove_epydoc_sym_suffix(str(
x.defining_module.canonical_name))
if defining_module_name in ASSUME_MODULES_OK:
return True
if self.expanded_docindex.add_module(defining_module_name):
if check_container():
return True
return False
if check_container():
return True
if (isinstance(container, RoutineDoc) and
identifier in container.all_args()):
return True
if check_defining_module(container):
return True
# If the user has imported foo.bar.baz as baz and now uses
# ``baz.quux``, we need to add the module foo.bar.baz.
for prefix in reversed(list(prefixes(
DottedIdentifier(remove_epydoc_sym_suffix(identifier))))):
if check_defining_module(
self.docindex.find(str(prefix), container)):
return True
try:
module = ModuleHandle.containing(identifier)
except ImportError:
pass
else:
if str(module.name) in ASSUME_MODULES_OK:
return True
if self.expanded_docindex.add_module(module):
if check_container():
return True
return False
def scan_docstring(self, parsed_docstring, container):
if parsed_docstring in (None, UNKNOWN): return ''
if isinstance(parsed_docstring, ParsedPlaintextDocstring):
return ''
def scan_tree(tree):
if isinstance(tree, six.string_types):
return tree
variables = [scan_tree(child) for child in tree.children]
if tree.tag == 'link':
identifier = variables[1]
if not self.check_xref(identifier, container):
self._failed_xrefs.append(
describe_xref(identifier, container) )
return '?'
elif tree.tag == 'indexed':
return '?'
elif tree.tag in ('epytext', 'section', 'tag', 'arg',
'name', 'target', 'html', 'para'):
return ''.join(variables)
return '?'
scan_tree(parsed_docstring._tree)
def find_bad_doc_cross_references(names):
"""
Find docstring cross references that fail to resolve.
:type names:
Sequence of module names or filenames.
:return:
Sequence of ``(module, linenos, container_name, identifier)`` tuples.
"""
xrs = XrefScanner(names)
return xrs.scan()