# pyflyby/_docxref.py.

# Module for checking Epydoc cross-references.

# Portions of the code below are derived from Epydoc, which is distributed
# under the MIT license:
#
#   Permission is hereby granted, free of charge, to any person obtaining a
#   copy of this software and any associated documentation files (the
#   "Software"), to deal in the Software without restriction, including
#   without limitation the rights to use, copy, modify, merge, publish,
#   distribute, sublicense, and/or sell copies of the Software, and to permit
#   persons to whom the Software is furnished to do so, subject to the
#   following conditions:
#
#   The above copyright notice and this permission notice shall be included in
#   all copies or substantial portions of the Software.
#
#   The software is provided "as is", without warranty of any kind, express or
#   implied, including but not limited to the warranties of merchantability,
#   fitness for a particular purpose and noninfringement. In no event shall
#   the authors or copyright holders be liable for any claim, damages or other
#   liability, whether in an action of contract, tort or otherwise, arising
#   from, out of or in connection with the software or the use or other
#   dealings in the software.

from __future__ import (absolute_import, division, print_function,
                        with_statement)

import re
import six
from   six.moves                import builtins
from   textwrap                 import dedent

from   epydoc.apidoc            import (ClassDoc, ModuleDoc, PropertyDoc,
                                        RoutineDoc, UNKNOWN, VariableDoc)
from   epydoc.docbuilder        import build_doc_index
from   epydoc.markup.plaintext  import ParsedPlaintextDocstring

from   pyflyby._file            import Filename
from   pyflyby._idents          import DottedIdentifier
from   pyflyby._log             import logger
from   pyflyby._modules         import ModuleHandle
from   pyflyby._util            import cached_attribute, memoize, prefixes

# If someone references numpy.*, just assume it's OK - it's not worth
# following into numpy because it's too slow.
ASSUME_MODULES_OK = set(['numpy'])

@memoize
def map_strings_to_line_numbers(module):
    """
    Walk ``module.ast``, looking at all string literals.  Return a map from
    string literals to line numbers (1-index).

    :rtype:
      ``dict`` from ``str`` to (``int``, ``str``)
    """
    d = {}
    for field in module.block.string_literals():
        # Dedent because epydoc dedents strings and we need to look up by
        # those.  But keep track of original version because we need to count
        # exact line numbers.
        s = dedent(field.s).strip()
        start_lineno = field.startpos.lineno
        d[s] = (start_lineno, field.s)
    return d


def get_string_linenos(module, searchstring, within_string):
    """
    Return the line numbers (1-indexed) within ``filename`` that contain
    ``searchstring``.  Only consider string literals (i.e. not comments).
    First look for exact matches of ``within_string`` (modulo indenting) and
    then search within that.  Only if the ``within_string`` is not found,
    search the entire file.

    [If there's a comment on the same line as a string that also contains the
    searchstring, we'll get confused.]
    """
    module = ModuleHandle(module)
    regexp = re.compile(searchstring)
    map = map_strings_to_line_numbers(module)
    results = []
    def scan_within_string(results, start_lineno, orig_full_string):
        for i, line in enumerate(orig_full_string.splitlines()):
            if regexp.search(line):
                results.append( start_lineno + i )
    try:
        lineno, orig_full_string = map[within_string.strip()]
    except KeyError:
        pass
    else:
        # We found the larger string exactly within the ast.
        scan_within_string(results, lineno, orig_full_string)
        if results:
            return tuple(results)
        # We could continue down if this ever happened.
        raise Exception(
            "Found superstring in %r but not substring %r within superstring"
            % (module.filename, searchstring))
    # Try a full text search.
    for lineno, orig_full_string in map.values():
        scan_within_string(results, lineno, orig_full_string)
    if results:
        return tuple(sorted(results))
    raise Exception(
        "Could not find %r anywhere in %r" % (searchstring, module.filename))


def describe_xref(identifier, container):
    module = ModuleHandle(str(container.defining_module.canonical_name))
    assert module.filename == Filename(container.defining_module.filename)
    linenos = get_string_linenos(
        module,
        "(L{|<)%s" % (identifier,),
        container.docstring)
    return (module, linenos, str(container.canonical_name), identifier)


def safe_build_doc_index(modules):
    # build_doc_index isn't re-entrant due to crappy caching! >:(
    from epydoc.docintrospecter import clear_cache
    clear_cache()
    from epydoc.docparser import _moduledoc_cache
    _moduledoc_cache.clear()
    # Build a new DocIndex.  It swallows exceptions and returns None on error!
    # >:(
    result = build_doc_index(modules)
    if result is None:
        raise Exception("Failed to build doc index on %r" % (modules,))
    return result


class ExpandedDocIndex(object):
    """
    A wrapper around DocIndex that automatically expands with more modules as
    needed.
    """
    # TODO: this is kludgy and inefficient since it re-reads modules.
    def __init__(self, modules):
        self.modules = set([ModuleHandle(m) for m in modules])

    def add_module(self, module):
        """
        Adds ``module`` and recreates the DocIndex with the updated set of
        modules.

        :return:
          Whether anything was added.
        """
        module = ModuleHandle(module)
        for prefix in module.ancestors:
            if prefix in self.modules:
                # The module, or a prefix of it, was already added.
                return False

        for existing_module in sorted(self.modules):
            if existing_module.startswith(module):
                # This supersedes an existing module.
                assert existing_module != module
                self.modules.remove(existing_module)

        logger.debug("Expanding docindex to include %r", module)
        self.modules.add(module)
        del self.docindex
        return True

    def find(self, a, b):
        return self.docindex.find(a, b)

    def get_vardoc(self, a):
        return self.docindex.get_vardoc(a)

    @cached_attribute
    def docindex(self):
        return safe_build_doc_index(
            [str(m.name) for m in sorted(self.modules)])


def remove_epydoc_sym_suffix(s):
    """
    Remove trailing "'" that Epydoc annoyingly adds to 'shadowed' names.

      >>> remove_epydoc_sym_suffix("a.b'.c'.d")
      'a.b.c.d'

    """
    return re.sub(r"'([.]|$)", r'\1', s)

class XrefScanner(object):

    def __init__(self, modules):
        self.modules = modules
        self.docindex = safe_build_doc_index(modules)

    @cached_attribute
    def expanded_docindex(self):
        return ExpandedDocIndex(self.modules)

    def scan(self):
        self._failed_xrefs = []
        valdocs = sorted(self.docindex.reachable_valdocs(
            imports=False, packages=False, bases=False, submodules=False,
            subclasses=False, private=True
            ))
        for doc in valdocs:
            if isinstance(doc, ClassDoc):
                self.scan_class(doc)
            elif isinstance(doc, ModuleDoc):
                self.scan_module(doc)
        return tuple(sorted(self._failed_xrefs))

    def scan_module(self, doc):
        self.descr(doc)
        if doc.is_package is True:
            for submodule in doc.submodules:
                self.scan_module(submodule)
            # self.scan_module_list(doc)
        self.scan_details_list(doc, "function")
        self.scan_details_list(doc, "other")

    def scan_class(self, doc):
        self.descr(doc)
        self.scan_details_list(doc, "method")
        self.scan_details_list(doc, "classvariable")
        self.scan_details_list(doc, "instancevariable")
        self.scan_details_list(doc, "property")

    def scan_details_list(self, doc, value_type):
        detailed = True
        if isinstance(doc, ClassDoc):
            var_docs = doc.select_variables(value_type=value_type,
                                            imported=False, inherited=False,
                                            public=None,
                                            detailed=detailed)
        else:
            var_docs = doc.select_variables(value_type=value_type,
                                            imported=False,
                                            public=None,
                                            detailed=detailed)
        for var_doc in var_docs:
            self.scan_details(var_doc)

    def scan_details(self, var_doc):
        self.descr(var_doc)
        if isinstance(var_doc.value, RoutineDoc):
            self.return_type(var_doc)
            self.return_descr(var_doc)
            for (arg_names, arg_descr) in var_doc.value.arg_descrs:
                self.scan_docstring(arg_descr, var_doc.value)
            for arg in var_doc.value.arg_types:
                self.scan_docstring(
                    var_doc.value.arg_types[arg], var_doc.value)
        elif isinstance(var_doc.value, PropertyDoc):
            prop_doc = var_doc.value
            self.return_type(prop_doc.fget)
            self.return_type(prop_doc.fset)
            self.return_type(prop_doc.fdel)
        else:
            self.type_descr(var_doc)

    def _scan_attr(self, attr, api_doc):
        if api_doc in (None, UNKNOWN):
            return ''
        pds = getattr(api_doc, attr, None) # pds = ParsedDocstring.
        if pds not in (None, UNKNOWN):
            self.scan_docstring(pds, api_doc)
        elif isinstance(api_doc, VariableDoc):
            self._scan_attr(attr, api_doc.value)

    def summary(self, api_doc):
        self._scan_attr('summary', api_doc)

    def descr(self, api_doc):
        self._scan_attr('descr', api_doc)

    def type_descr(self, api_doc):
        self._scan_attr('type_descr', api_doc)

    def return_type(self, api_doc):
        self._scan_attr('return_type', api_doc)

    def return_descr(self, api_doc):
        self._scan_attr('return_descr', api_doc)

    def check_xref(self, identifier, container):
        """
        Check that ``identifier`` cross-references a proper symbol.

        Look in modules that we weren't explicitly asked to look in, if
        needed.
        """
        if identifier in builtins.__dict__:
            return True
        def check_container():
            if self.expanded_docindex.find(identifier, container) is not None:
                return True
            if isinstance(container, RoutineDoc):
                tcontainer = self.expanded_docindex.get_vardoc(
                    container.canonical_name)
                doc = self.expanded_docindex.find(identifier, tcontainer)
                while (doc is not None and tcontainer not in (None, UNKNOWN)
                       and tcontainer.overrides not in (None, UNKNOWN)):
                    tcontainer = tcontainer.overrides
                    doc = self.expanded_docindex.find(identifier, tcontainer)
                return doc is not None
            return False
        def check_defining_module(x):
            if x is None:
                return False
            defining_module_name = remove_epydoc_sym_suffix(str(
                x.defining_module.canonical_name))
            if defining_module_name in ASSUME_MODULES_OK:
                return True
            if self.expanded_docindex.add_module(defining_module_name):
                if check_container():
                    return True
            return False
        if check_container():
            return True
        if (isinstance(container, RoutineDoc) and
            identifier in container.all_args()):
            return True
        if check_defining_module(container):
            return True
        # If the user has imported foo.bar.baz as baz and now uses
        # ``baz.quux``, we need to add the module foo.bar.baz.
        for prefix in reversed(list(prefixes(
                    DottedIdentifier(remove_epydoc_sym_suffix(identifier))))):
            if check_defining_module(
                self.docindex.find(str(prefix), container)):
                return True
        try:
            module = ModuleHandle.containing(identifier)
        except ImportError:
            pass
        else:
            if str(module.name) in ASSUME_MODULES_OK:
                return True
            if self.expanded_docindex.add_module(module):
                if check_container():
                    return True
        return False

    def scan_docstring(self, parsed_docstring, container):
        if parsed_docstring in (None, UNKNOWN): return ''
        if isinstance(parsed_docstring, ParsedPlaintextDocstring):
            return ''

        def scan_tree(tree):
            if isinstance(tree, six.string_types):
                return tree
            variables = [scan_tree(child) for child in tree.children]
            if tree.tag == 'link':
                identifier = variables[1]
                if not self.check_xref(identifier, container):
                    self._failed_xrefs.append(
                        describe_xref(identifier, container) )
                return '?'
            elif tree.tag == 'indexed':
                return '?'
            elif tree.tag in ('epytext', 'section', 'tag', 'arg',
                              'name', 'target', 'html', 'para'):
                return ''.join(variables)
            return '?'

        scan_tree(parsed_docstring._tree)


def find_bad_doc_cross_references(names):
    """
    Find docstring cross references that fail to resolve.

    :type names:
      Sequence of module names or filenames.
    :return:
      Sequence of ``(module, linenos, container_name, identifier)`` tuples.
    """
    xrs = XrefScanner(names)
    return xrs.scan()