init

2022-03-31 20:20:07 -05:00 · 2022-03-31 20:20:07 -05:00 · 38355d2442
commit 38355d2442
9083 changed files with 1225834 additions and 0 deletions
--- a/.venv/lib/python3.8/site-packages/pyflyby/_docxref.py
+++ b/.venv/lib/python3.8/site-packages/pyflyby/_docxref.py
@ -0,0 +1,382 @@
+# pyflyby/_docxref.py.
+
+# Module for checking Epydoc cross-references.
+
+# Portions of the code below are derived from Epydoc, which is distributed
+# under the MIT license:
+#
+#   Permission is hereby granted, free of charge, to any person obtaining a
+#   copy of this software and any associated documentation files (the
+#   "Software"), to deal in the Software without restriction, including
+#   without limitation the rights to use, copy, modify, merge, publish,
+#   distribute, sublicense, and/or sell copies of the Software, and to permit
+#   persons to whom the Software is furnished to do so, subject to the
+#   following conditions:
+#
+#   The above copyright notice and this permission notice shall be included in
+#   all copies or substantial portions of the Software.
+#
+#   The software is provided "as is", without warranty of any kind, express or
+#   implied, including but not limited to the warranties of merchantability,
+#   fitness for a particular purpose and noninfringement. In no event shall
+#   the authors or copyright holders be liable for any claim, damages or other
+#   liability, whether in an action of contract, tort or otherwise, arising
+#   from, out of or in connection with the software or the use or other
+#   dealings in the software.
+
+from __future__ import (absolute_import, division, print_function,
+                        with_statement)
+
+import re
+import six
+from   six.moves                import builtins
+from   textwrap                 import dedent
+
+from   epydoc.apidoc            import (ClassDoc, ModuleDoc, PropertyDoc,
+                                        RoutineDoc, UNKNOWN, VariableDoc)
+from   epydoc.docbuilder        import build_doc_index
+from   epydoc.markup.plaintext  import ParsedPlaintextDocstring
+
+from   pyflyby._file            import Filename
+from   pyflyby._idents          import DottedIdentifier
+from   pyflyby._log             import logger
+from   pyflyby._modules         import ModuleHandle
+from   pyflyby._util            import cached_attribute, memoize, prefixes
+
+# If someone references numpy.*, just assume it's OK - it's not worth
+# following into numpy because it's too slow.
+ASSUME_MODULES_OK = set(['numpy'])
+
+@memoize
+def map_strings_to_line_numbers(module):
+    """
+    Walk ``module.ast``, looking at all string literals.  Return a map from
+    string literals to line numbers (1-index).
+
+    :rtype:
+      ``dict`` from ``str`` to (``int``, ``str``)
+    """
+    d = {}
+    for field in module.block.string_literals():
+        # Dedent because epydoc dedents strings and we need to look up by
+        # those.  But keep track of original version because we need to count
+        # exact line numbers.
+        s = dedent(field.s).strip()
+        start_lineno = field.startpos.lineno
+        d[s] = (start_lineno, field.s)
+    return d
+
+
+def get_string_linenos(module, searchstring, within_string):
+    """
+    Return the line numbers (1-indexed) within ``filename`` that contain
+    ``searchstring``.  Only consider string literals (i.e. not comments).
+    First look for exact matches of ``within_string`` (modulo indenting) and
+    then search within that.  Only if the ``within_string`` is not found,
+    search the entire file.
+
+    [If there's a comment on the same line as a string that also contains the
+    searchstring, we'll get confused.]
+    """
+    module = ModuleHandle(module)
+    regexp = re.compile(searchstring)
+    map = map_strings_to_line_numbers(module)
+    results = []
+    def scan_within_string(results, start_lineno, orig_full_string):
+        for i, line in enumerate(orig_full_string.splitlines()):
+            if regexp.search(line):
+                results.append( start_lineno + i )
+    try:
+        lineno, orig_full_string = map[within_string.strip()]
+    except KeyError:
+        pass
+    else:
+        # We found the larger string exactly within the ast.
+        scan_within_string(results, lineno, orig_full_string)
+        if results:
+            return tuple(results)
+        # We could continue down if this ever happened.
+        raise Exception(
+            "Found superstring in %r but not substring %r within superstring"
+            % (module.filename, searchstring))
+    # Try a full text search.
+    for lineno, orig_full_string in map.values():
+        scan_within_string(results, lineno, orig_full_string)
+    if results:
+        return tuple(sorted(results))
+    raise Exception(
+        "Could not find %r anywhere in %r" % (searchstring, module.filename))
+
+
+def describe_xref(identifier, container):
+    module = ModuleHandle(str(container.defining_module.canonical_name))
+    assert module.filename == Filename(container.defining_module.filename)
+    linenos = get_string_linenos(
+        module,
+        "(L{|<)%s" % (identifier,),
+        container.docstring)
+    return (module, linenos, str(container.canonical_name), identifier)
+
+
+
+def safe_build_doc_index(modules):
+    # build_doc_index isn't re-entrant due to crappy caching! >:(
+    from epydoc.docintrospecter import clear_cache
+    clear_cache()
+    from epydoc.docparser import _moduledoc_cache
+    _moduledoc_cache.clear()
+    # Build a new DocIndex.  It swallows exceptions and returns None on error!
+    # >:(
+    result = build_doc_index(modules)
+    if result is None:
+        raise Exception("Failed to build doc index on %r" % (modules,))
+    return result
+
+
+class ExpandedDocIndex(object):
+    """
+    A wrapper around DocIndex that automatically expands with more modules as
+    needed.
+    """
+    # TODO: this is kludgy and inefficient since it re-reads modules.
+    def __init__(self, modules):
+        self.modules = set([ModuleHandle(m) for m in modules])
+
+    def add_module(self, module):
+        """
+        Adds ``module`` and recreates the DocIndex with the updated set of
+        modules.
+
+        :return:
+          Whether anything was added.
+        """
+        module = ModuleHandle(module)
+        for prefix in module.ancestors:
+            if prefix in self.modules:
+                # The module, or a prefix of it, was already added.
+                return False
+
+        for existing_module in sorted(self.modules):
+            if existing_module.startswith(module):
+                # This supersedes an existing module.
+                assert existing_module != module
+                self.modules.remove(existing_module)
+
+        logger.debug("Expanding docindex to include %r", module)
+        self.modules.add(module)
+        del self.docindex
+        return True
+
+    def find(self, a, b):
+        return self.docindex.find(a, b)
+
+    def get_vardoc(self, a):
+        return self.docindex.get_vardoc(a)
+
+    @cached_attribute
+    def docindex(self):
+        return safe_build_doc_index(
+            [str(m.name) for m in sorted(self.modules)])
+
+
+def remove_epydoc_sym_suffix(s):
+    """
+    Remove trailing "'" that Epydoc annoyingly adds to 'shadowed' names.
+
+      >>> remove_epydoc_sym_suffix("a.b'.c'.d")
+      'a.b.c.d'
+
+    """
+    return re.sub(r"'([.]|$)", r'\1', s)
+
+class XrefScanner(object):
+
+    def __init__(self, modules):
+        self.modules = modules
+        self.docindex = safe_build_doc_index(modules)
+
+    @cached_attribute
+    def expanded_docindex(self):
+        return ExpandedDocIndex(self.modules)
+
+    def scan(self):
+        self._failed_xrefs = []
+        valdocs = sorted(self.docindex.reachable_valdocs(
+            imports=False, packages=False, bases=False, submodules=False,
+            subclasses=False, private=True
+            ))
+        for doc in valdocs:
+            if isinstance(doc, ClassDoc):
+                self.scan_class(doc)
+            elif isinstance(doc, ModuleDoc):
+                self.scan_module(doc)
+        return tuple(sorted(self._failed_xrefs))
+
+    def scan_module(self, doc):
+        self.descr(doc)
+        if doc.is_package is True:
+            for submodule in doc.submodules:
+                self.scan_module(submodule)
+            # self.scan_module_list(doc)
+        self.scan_details_list(doc, "function")
+        self.scan_details_list(doc, "other")
+
+    def scan_class(self, doc):
+        self.descr(doc)
+        self.scan_details_list(doc, "method")
+        self.scan_details_list(doc, "classvariable")
+        self.scan_details_list(doc, "instancevariable")
+        self.scan_details_list(doc, "property")
+
+    def scan_details_list(self, doc, value_type):
+        detailed = True
+        if isinstance(doc, ClassDoc):
+            var_docs = doc.select_variables(value_type=value_type,
+                                            imported=False, inherited=False,
+                                            public=None,
+                                            detailed=detailed)
+        else:
+            var_docs = doc.select_variables(value_type=value_type,
+                                            imported=False,
+                                            public=None,
+                                            detailed=detailed)
+        for var_doc in var_docs:
+            self.scan_details(var_doc)
+
+    def scan_details(self, var_doc):
+        self.descr(var_doc)
+        if isinstance(var_doc.value, RoutineDoc):
+            self.return_type(var_doc)
+            self.return_descr(var_doc)
+            for (arg_names, arg_descr) in var_doc.value.arg_descrs:
+                self.scan_docstring(arg_descr, var_doc.value)
+            for arg in var_doc.value.arg_types:
+                self.scan_docstring(
+                    var_doc.value.arg_types[arg], var_doc.value)
+        elif isinstance(var_doc.value, PropertyDoc):
+            prop_doc = var_doc.value
+            self.return_type(prop_doc.fget)
+            self.return_type(prop_doc.fset)
+            self.return_type(prop_doc.fdel)
+        else:
+            self.type_descr(var_doc)
+
+    def _scan_attr(self, attr, api_doc):
+        if api_doc in (None, UNKNOWN):
+            return ''
+        pds = getattr(api_doc, attr, None) # pds = ParsedDocstring.
+        if pds not in (None, UNKNOWN):
+            self.scan_docstring(pds, api_doc)
+        elif isinstance(api_doc, VariableDoc):
+            self._scan_attr(attr, api_doc.value)
+
+    def summary(self, api_doc):
+        self._scan_attr('summary', api_doc)
+
+    def descr(self, api_doc):
+        self._scan_attr('descr', api_doc)
+
+    def type_descr(self, api_doc):
+        self._scan_attr('type_descr', api_doc)
+
+    def return_type(self, api_doc):
+        self._scan_attr('return_type', api_doc)
+
+    def return_descr(self, api_doc):
+        self._scan_attr('return_descr', api_doc)
+
+    def check_xref(self, identifier, container):
+        """
+        Check that ``identifier`` cross-references a proper symbol.
+
+        Look in modules that we weren't explicitly asked to look in, if
+        needed.
+        """
+        if identifier in builtins.__dict__:
+            return True
+        def check_container():
+            if self.expanded_docindex.find(identifier, container) is not None:
+                return True
+            if isinstance(container, RoutineDoc):
+                tcontainer = self.expanded_docindex.get_vardoc(
+                    container.canonical_name)
+                doc = self.expanded_docindex.find(identifier, tcontainer)
+                while (doc is not None and tcontainer not in (None, UNKNOWN)
+                       and tcontainer.overrides not in (None, UNKNOWN)):
+                    tcontainer = tcontainer.overrides
+                    doc = self.expanded_docindex.find(identifier, tcontainer)
+                return doc is not None
+            return False
+        def check_defining_module(x):
+            if x is None:
+                return False
+            defining_module_name = remove_epydoc_sym_suffix(str(
+                x.defining_module.canonical_name))
+            if defining_module_name in ASSUME_MODULES_OK:
+                return True
+            if self.expanded_docindex.add_module(defining_module_name):
+                if check_container():
+                    return True
+            return False
+        if check_container():
+            return True
+        if (isinstance(container, RoutineDoc) and
+            identifier in container.all_args()):
+            return True
+        if check_defining_module(container):
+            return True
+        # If the user has imported foo.bar.baz as baz and now uses
+        # ``baz.quux``, we need to add the module foo.bar.baz.
+        for prefix in reversed(list(prefixes(
+                    DottedIdentifier(remove_epydoc_sym_suffix(identifier))))):
+            if check_defining_module(
+                self.docindex.find(str(prefix), container)):
+                return True
+        try:
+            module = ModuleHandle.containing(identifier)
+        except ImportError:
+            pass
+        else:
+            if str(module.name) in ASSUME_MODULES_OK:
+                return True
+            if self.expanded_docindex.add_module(module):
+                if check_container():
+                    return True
+        return False
+
+    def scan_docstring(self, parsed_docstring, container):
+        if parsed_docstring in (None, UNKNOWN): return ''
+        if isinstance(parsed_docstring, ParsedPlaintextDocstring):
+            return ''
+
+        def scan_tree(tree):
+            if isinstance(tree, six.string_types):
+                return tree
+            variables = [scan_tree(child) for child in tree.children]
+            if tree.tag == 'link':
+                identifier = variables[1]
+                if not self.check_xref(identifier, container):
+                    self._failed_xrefs.append(
+                        describe_xref(identifier, container) )
+                return '?'
+            elif tree.tag == 'indexed':
+                return '?'
+            elif tree.tag in ('epytext', 'section', 'tag', 'arg',
+                              'name', 'target', 'html', 'para'):
+                return ''.join(variables)
+            return '?'
+
+        scan_tree(parsed_docstring._tree)
+
+
+def find_bad_doc_cross_references(names):
+    """
+    Find docstring cross references that fail to resolve.
+
+    :type names:
+      Sequence of module names or filenames.
+    :return:
+      Sequence of ``(module, linenos, container_name, identifier)`` tuples.
+    """
+    xrs = XrefScanner(names)
+    return xrs.scan()