382 lines
14 KiB
Python
382 lines
14 KiB
Python
# pyflyby/_docxref.py.
|
|
|
|
# Module for checking Epydoc cross-references.
|
|
|
|
# Portions of the code below are derived from Epydoc, which is distributed
|
|
# under the MIT license:
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
# copy of this software and any associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to permit
|
|
# persons to whom the Software is furnished to do so, subject to the
|
|
# following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# The software is provided "as is", without warranty of any kind, express or
|
|
# implied, including but not limited to the warranties of merchantability,
|
|
# fitness for a particular purpose and noninfringement. In no event shall
|
|
# the authors or copyright holders be liable for any claim, damages or other
|
|
# liability, whether in an action of contract, tort or otherwise, arising
|
|
# from, out of or in connection with the software or the use or other
|
|
# dealings in the software.
|
|
|
|
from __future__ import (absolute_import, division, print_function,
|
|
with_statement)
|
|
|
|
import re
|
|
import six
|
|
from six.moves import builtins
|
|
from textwrap import dedent
|
|
|
|
from epydoc.apidoc import (ClassDoc, ModuleDoc, PropertyDoc,
|
|
RoutineDoc, UNKNOWN, VariableDoc)
|
|
from epydoc.docbuilder import build_doc_index
|
|
from epydoc.markup.plaintext import ParsedPlaintextDocstring
|
|
|
|
from pyflyby._file import Filename
|
|
from pyflyby._idents import DottedIdentifier
|
|
from pyflyby._log import logger
|
|
from pyflyby._modules import ModuleHandle
|
|
from pyflyby._util import cached_attribute, memoize, prefixes
|
|
|
|
# If someone references numpy.*, just assume it's OK - it's not worth
|
|
# following into numpy because it's too slow.
|
|
ASSUME_MODULES_OK = set(['numpy'])
|
|
|
|
@memoize
|
|
def map_strings_to_line_numbers(module):
|
|
"""
|
|
Walk ``module.ast``, looking at all string literals. Return a map from
|
|
string literals to line numbers (1-index).
|
|
|
|
:rtype:
|
|
``dict`` from ``str`` to (``int``, ``str``)
|
|
"""
|
|
d = {}
|
|
for field in module.block.string_literals():
|
|
# Dedent because epydoc dedents strings and we need to look up by
|
|
# those. But keep track of original version because we need to count
|
|
# exact line numbers.
|
|
s = dedent(field.s).strip()
|
|
start_lineno = field.startpos.lineno
|
|
d[s] = (start_lineno, field.s)
|
|
return d
|
|
|
|
|
|
def get_string_linenos(module, searchstring, within_string):
|
|
"""
|
|
Return the line numbers (1-indexed) within ``filename`` that contain
|
|
``searchstring``. Only consider string literals (i.e. not comments).
|
|
First look for exact matches of ``within_string`` (modulo indenting) and
|
|
then search within that. Only if the ``within_string`` is not found,
|
|
search the entire file.
|
|
|
|
[If there's a comment on the same line as a string that also contains the
|
|
searchstring, we'll get confused.]
|
|
"""
|
|
module = ModuleHandle(module)
|
|
regexp = re.compile(searchstring)
|
|
map = map_strings_to_line_numbers(module)
|
|
results = []
|
|
def scan_within_string(results, start_lineno, orig_full_string):
|
|
for i, line in enumerate(orig_full_string.splitlines()):
|
|
if regexp.search(line):
|
|
results.append( start_lineno + i )
|
|
try:
|
|
lineno, orig_full_string = map[within_string.strip()]
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
# We found the larger string exactly within the ast.
|
|
scan_within_string(results, lineno, orig_full_string)
|
|
if results:
|
|
return tuple(results)
|
|
# We could continue down if this ever happened.
|
|
raise Exception(
|
|
"Found superstring in %r but not substring %r within superstring"
|
|
% (module.filename, searchstring))
|
|
# Try a full text search.
|
|
for lineno, orig_full_string in map.values():
|
|
scan_within_string(results, lineno, orig_full_string)
|
|
if results:
|
|
return tuple(sorted(results))
|
|
raise Exception(
|
|
"Could not find %r anywhere in %r" % (searchstring, module.filename))
|
|
|
|
|
|
def describe_xref(identifier, container):
|
|
module = ModuleHandle(str(container.defining_module.canonical_name))
|
|
assert module.filename == Filename(container.defining_module.filename)
|
|
linenos = get_string_linenos(
|
|
module,
|
|
"(L{|<)%s" % (identifier,),
|
|
container.docstring)
|
|
return (module, linenos, str(container.canonical_name), identifier)
|
|
|
|
|
|
|
|
def safe_build_doc_index(modules):
|
|
# build_doc_index isn't re-entrant due to crappy caching! >:(
|
|
from epydoc.docintrospecter import clear_cache
|
|
clear_cache()
|
|
from epydoc.docparser import _moduledoc_cache
|
|
_moduledoc_cache.clear()
|
|
# Build a new DocIndex. It swallows exceptions and returns None on error!
|
|
# >:(
|
|
result = build_doc_index(modules)
|
|
if result is None:
|
|
raise Exception("Failed to build doc index on %r" % (modules,))
|
|
return result
|
|
|
|
|
|
class ExpandedDocIndex(object):
|
|
"""
|
|
A wrapper around DocIndex that automatically expands with more modules as
|
|
needed.
|
|
"""
|
|
# TODO: this is kludgy and inefficient since it re-reads modules.
|
|
def __init__(self, modules):
|
|
self.modules = set([ModuleHandle(m) for m in modules])
|
|
|
|
def add_module(self, module):
|
|
"""
|
|
Adds ``module`` and recreates the DocIndex with the updated set of
|
|
modules.
|
|
|
|
:return:
|
|
Whether anything was added.
|
|
"""
|
|
module = ModuleHandle(module)
|
|
for prefix in module.ancestors:
|
|
if prefix in self.modules:
|
|
# The module, or a prefix of it, was already added.
|
|
return False
|
|
|
|
for existing_module in sorted(self.modules):
|
|
if existing_module.startswith(module):
|
|
# This supersedes an existing module.
|
|
assert existing_module != module
|
|
self.modules.remove(existing_module)
|
|
|
|
logger.debug("Expanding docindex to include %r", module)
|
|
self.modules.add(module)
|
|
del self.docindex
|
|
return True
|
|
|
|
def find(self, a, b):
|
|
return self.docindex.find(a, b)
|
|
|
|
def get_vardoc(self, a):
|
|
return self.docindex.get_vardoc(a)
|
|
|
|
@cached_attribute
|
|
def docindex(self):
|
|
return safe_build_doc_index(
|
|
[str(m.name) for m in sorted(self.modules)])
|
|
|
|
|
|
def remove_epydoc_sym_suffix(s):
|
|
"""
|
|
Remove trailing "'" that Epydoc annoyingly adds to 'shadowed' names.
|
|
|
|
>>> remove_epydoc_sym_suffix("a.b'.c'.d")
|
|
'a.b.c.d'
|
|
|
|
"""
|
|
return re.sub(r"'([.]|$)", r'\1', s)
|
|
|
|
class XrefScanner(object):
|
|
|
|
def __init__(self, modules):
|
|
self.modules = modules
|
|
self.docindex = safe_build_doc_index(modules)
|
|
|
|
@cached_attribute
|
|
def expanded_docindex(self):
|
|
return ExpandedDocIndex(self.modules)
|
|
|
|
def scan(self):
|
|
self._failed_xrefs = []
|
|
valdocs = sorted(self.docindex.reachable_valdocs(
|
|
imports=False, packages=False, bases=False, submodules=False,
|
|
subclasses=False, private=True
|
|
))
|
|
for doc in valdocs:
|
|
if isinstance(doc, ClassDoc):
|
|
self.scan_class(doc)
|
|
elif isinstance(doc, ModuleDoc):
|
|
self.scan_module(doc)
|
|
return tuple(sorted(self._failed_xrefs))
|
|
|
|
def scan_module(self, doc):
|
|
self.descr(doc)
|
|
if doc.is_package is True:
|
|
for submodule in doc.submodules:
|
|
self.scan_module(submodule)
|
|
# self.scan_module_list(doc)
|
|
self.scan_details_list(doc, "function")
|
|
self.scan_details_list(doc, "other")
|
|
|
|
def scan_class(self, doc):
|
|
self.descr(doc)
|
|
self.scan_details_list(doc, "method")
|
|
self.scan_details_list(doc, "classvariable")
|
|
self.scan_details_list(doc, "instancevariable")
|
|
self.scan_details_list(doc, "property")
|
|
|
|
def scan_details_list(self, doc, value_type):
|
|
detailed = True
|
|
if isinstance(doc, ClassDoc):
|
|
var_docs = doc.select_variables(value_type=value_type,
|
|
imported=False, inherited=False,
|
|
public=None,
|
|
detailed=detailed)
|
|
else:
|
|
var_docs = doc.select_variables(value_type=value_type,
|
|
imported=False,
|
|
public=None,
|
|
detailed=detailed)
|
|
for var_doc in var_docs:
|
|
self.scan_details(var_doc)
|
|
|
|
def scan_details(self, var_doc):
|
|
self.descr(var_doc)
|
|
if isinstance(var_doc.value, RoutineDoc):
|
|
self.return_type(var_doc)
|
|
self.return_descr(var_doc)
|
|
for (arg_names, arg_descr) in var_doc.value.arg_descrs:
|
|
self.scan_docstring(arg_descr, var_doc.value)
|
|
for arg in var_doc.value.arg_types:
|
|
self.scan_docstring(
|
|
var_doc.value.arg_types[arg], var_doc.value)
|
|
elif isinstance(var_doc.value, PropertyDoc):
|
|
prop_doc = var_doc.value
|
|
self.return_type(prop_doc.fget)
|
|
self.return_type(prop_doc.fset)
|
|
self.return_type(prop_doc.fdel)
|
|
else:
|
|
self.type_descr(var_doc)
|
|
|
|
def _scan_attr(self, attr, api_doc):
|
|
if api_doc in (None, UNKNOWN):
|
|
return ''
|
|
pds = getattr(api_doc, attr, None) # pds = ParsedDocstring.
|
|
if pds not in (None, UNKNOWN):
|
|
self.scan_docstring(pds, api_doc)
|
|
elif isinstance(api_doc, VariableDoc):
|
|
self._scan_attr(attr, api_doc.value)
|
|
|
|
def summary(self, api_doc):
|
|
self._scan_attr('summary', api_doc)
|
|
|
|
def descr(self, api_doc):
|
|
self._scan_attr('descr', api_doc)
|
|
|
|
def type_descr(self, api_doc):
|
|
self._scan_attr('type_descr', api_doc)
|
|
|
|
def return_type(self, api_doc):
|
|
self._scan_attr('return_type', api_doc)
|
|
|
|
def return_descr(self, api_doc):
|
|
self._scan_attr('return_descr', api_doc)
|
|
|
|
def check_xref(self, identifier, container):
|
|
"""
|
|
Check that ``identifier`` cross-references a proper symbol.
|
|
|
|
Look in modules that we weren't explicitly asked to look in, if
|
|
needed.
|
|
"""
|
|
if identifier in builtins.__dict__:
|
|
return True
|
|
def check_container():
|
|
if self.expanded_docindex.find(identifier, container) is not None:
|
|
return True
|
|
if isinstance(container, RoutineDoc):
|
|
tcontainer = self.expanded_docindex.get_vardoc(
|
|
container.canonical_name)
|
|
doc = self.expanded_docindex.find(identifier, tcontainer)
|
|
while (doc is not None and tcontainer not in (None, UNKNOWN)
|
|
and tcontainer.overrides not in (None, UNKNOWN)):
|
|
tcontainer = tcontainer.overrides
|
|
doc = self.expanded_docindex.find(identifier, tcontainer)
|
|
return doc is not None
|
|
return False
|
|
def check_defining_module(x):
|
|
if x is None:
|
|
return False
|
|
defining_module_name = remove_epydoc_sym_suffix(str(
|
|
x.defining_module.canonical_name))
|
|
if defining_module_name in ASSUME_MODULES_OK:
|
|
return True
|
|
if self.expanded_docindex.add_module(defining_module_name):
|
|
if check_container():
|
|
return True
|
|
return False
|
|
if check_container():
|
|
return True
|
|
if (isinstance(container, RoutineDoc) and
|
|
identifier in container.all_args()):
|
|
return True
|
|
if check_defining_module(container):
|
|
return True
|
|
# If the user has imported foo.bar.baz as baz and now uses
|
|
# ``baz.quux``, we need to add the module foo.bar.baz.
|
|
for prefix in reversed(list(prefixes(
|
|
DottedIdentifier(remove_epydoc_sym_suffix(identifier))))):
|
|
if check_defining_module(
|
|
self.docindex.find(str(prefix), container)):
|
|
return True
|
|
try:
|
|
module = ModuleHandle.containing(identifier)
|
|
except ImportError:
|
|
pass
|
|
else:
|
|
if str(module.name) in ASSUME_MODULES_OK:
|
|
return True
|
|
if self.expanded_docindex.add_module(module):
|
|
if check_container():
|
|
return True
|
|
return False
|
|
|
|
def scan_docstring(self, parsed_docstring, container):
|
|
if parsed_docstring in (None, UNKNOWN): return ''
|
|
if isinstance(parsed_docstring, ParsedPlaintextDocstring):
|
|
return ''
|
|
|
|
def scan_tree(tree):
|
|
if isinstance(tree, six.string_types):
|
|
return tree
|
|
variables = [scan_tree(child) for child in tree.children]
|
|
if tree.tag == 'link':
|
|
identifier = variables[1]
|
|
if not self.check_xref(identifier, container):
|
|
self._failed_xrefs.append(
|
|
describe_xref(identifier, container) )
|
|
return '?'
|
|
elif tree.tag == 'indexed':
|
|
return '?'
|
|
elif tree.tag in ('epytext', 'section', 'tag', 'arg',
|
|
'name', 'target', 'html', 'para'):
|
|
return ''.join(variables)
|
|
return '?'
|
|
|
|
scan_tree(parsed_docstring._tree)
|
|
|
|
|
|
def find_bad_doc_cross_references(names):
|
|
"""
|
|
Find docstring cross references that fail to resolve.
|
|
|
|
:type names:
|
|
Sequence of module names or filenames.
|
|
:return:
|
|
Sequence of ``(module, linenos, container_name, identifier)`` tuples.
|
|
"""
|
|
xrs = XrefScanner(names)
|
|
return xrs.scan()
|