# pyflyby/_modules.py.
# Copyright (C) 2011, 2012, 2013, 2014, 2015 Karl Chen.
# License: MIT http://opensource.org/licenses/MIT

from __future__ import (absolute_import, division, print_function,
                        with_statement)

from   functools                import total_ordering
import os
import re
import six
from   six                      import reraise
import sys
import types

from   pyflyby._file            import FileText, Filename
from   pyflyby._idents          import DottedIdentifier, is_identifier
from   pyflyby._log             import logger
from   pyflyby._util            import (ExcludeImplicitCwdFromPathCtx,
                                        cached_attribute, cmp, memoize,
                                        prefixes)


class ErrorDuringImportError(ImportError):
    """
    Exception raised by import_module if the module exists but an exception
    occurred while attempting to import it.  That nested exception could be
    ImportError, e.g. if a module tries to import another module that doesn't
    exist.
    """


@memoize
def import_module(module_name):
    module_name = str(module_name)
    logger.debug("Importing %r", module_name)
    try:
        result = __import__(module_name, fromlist=['dummy'])
        if result.__name__ != module_name:
            logger.debug("Note: import_module(%r).__name__ == %r",
                         module_name, result.__name__)
        return result
    except ImportError as e:
        # We got an ImportError.  Figure out whether this is due to the module
        # not existing, or whether the module exists but caused an ImportError
        # (perhaps due to trying to import another problematic module).
        # Do this by looking at the exception traceback.  If the previous
        # frame in the traceback is this function (because locals match), then
        # it should be the internal import machinery reporting that the module
        # doesn't exist.  Re-raise the exception as-is.
        # If some sys.meta_path or other import hook isn't compatible with
        # such a check, here are some things we could do:
        #   - Use pkgutil.find_loader() after the fact to check if the module
        #     is supposed to exist.  Note that we shouldn't rely solely on
        #     this before attempting to import, because find_loader() doesn't
        #     work with meta_path.
        #   - Write a memoized global function that compares in the current
        #     environment the difference between attempting to import a
        #     non-existent module vs a problematic module, and returns a
        #     function that uses the working discriminators.
        real_importerror1 = type(e) is ImportError
        real_importerror2 = (sys.exc_info()[2].tb_frame.f_locals is locals())
        m = re.match("^No module named (.*)$", str(e))
        real_importerror3 = (m and m.group(1) == module_name
                             or module_name.endswith("."+m.group(1)))
        logger.debug("import_module(%r): real ImportError: %s %s %s",
                     module_name,
                     real_importerror1, real_importerror2, real_importerror3)
        if real_importerror1 and real_importerror2 and real_importerror3:
            raise
        reraise(ErrorDuringImportError(
            "Error while attempting to import %s: %s: %s"
            % (module_name, type(e).__name__, e)), None, sys.exc_info()[2])
    except Exception as e:
        reraise(ErrorDuringImportError(
            "Error while attempting to import %s: %s: %s"
            % (module_name, type(e).__name__, e)), None, sys.exc_info()[2])


def _my_iter_modules(path, prefix=''):
    # Modified version of pkgutil.ImpImporter.iter_modules(), patched to
    # handle inaccessible subdirectories.
    if path is None:
        return
    try:
        filenames = os.listdir(path)
    except OSError:
        return # silently ignore inaccessible paths
    filenames.sort()  # handle packages before same-named modules
    yielded = {}
    import inspect
    for fn in filenames:
        modname = inspect.getmodulename(fn)
        if modname=='__init__' or modname in yielded:
            continue
        subpath = os.path.join(path, fn)
        ispkg = False
        try:
            if not modname and os.path.isdir(path) and '.' not in fn:
                modname = fn
                for fn in os.listdir(subpath):
                    subname = inspect.getmodulename(fn)
                    if subname=='__init__':
                        ispkg = True
                        break
                else:
                    continue    # not a package
        except OSError:
            continue # silently ignore inaccessible subdirectories
        if modname and '.' not in modname:
            yielded[modname] = 1
            yield prefix + modname, ispkg


def pyc_to_py(filename):
    if filename.endswith(".pyc") or filename.endswith(".pyo"):
        filename = filename[:-1]
    return filename


@total_ordering
class ModuleHandle(object):
    """
    A handle to a module.
    """

    def __new__(cls, arg):
        if isinstance(arg, cls):
            return arg
        if isinstance(arg, Filename):
            return cls._from_filename(arg)
        if isinstance(arg, (six.string_types, DottedIdentifier)):
            return cls._from_modulename(arg)
        if isinstance(arg, types.ModuleType):
            return cls._from_module(arg)
        raise TypeError("ModuleHandle: unexpected %s" % (type(arg).__name__,))

    _cls_cache = {}

    @classmethod
    def _from_modulename(cls, modulename):
        modulename = DottedIdentifier(modulename)
        try:
            return cls._cls_cache[modulename]
        except KeyError:
            pass
        self = object.__new__(cls)
        self.name = modulename
        cls._cls_cache[modulename] = self
        return self

    @classmethod
    def _from_module(cls, module):
        if not isinstance(module, types.ModuleType):
            raise TypeError
        self = cls._from_modulename(module.__name__)
        assert self.module is module
        return self

    @classmethod
    def _from_filename(cls, filename):
        filename = Filename(filename)
        raise NotImplementedError(
            "TODO: look at sys.path to guess module name")

    @cached_attribute
    def parent(self):
        if not self.name.parent:
            return None
        return ModuleHandle(self.name.parent)

    @cached_attribute
    def ancestors(self):
        return tuple(ModuleHandle(m) for m in self.name.prefixes)

    @cached_attribute
    def module(self):
        """
        Return the module instance.

        :rtype:
          ``types.ModuleType``
        :raise ErrorDuringImportError:
          The module should exist but an error occurred while attempting to
          import it.
        :raise ImportError:
          The module doesn't exist.
        """
        # First check if prefix component is importable.
        if self.parent:
            self.parent.module
        # Import.
        return import_module(self.name)

    @cached_attribute
    def exists(self):
        """
        Return whether the module exists, according to pkgutil.
        Note that this doesn't work for things that are only known by using
        sys.meta_path.
        """
        name = str(self.name)
        if name in sys.modules:
            return True
        if self.parent and not self.parent.exists:
            return False
        import pkgutil
        try:
            loader = pkgutil.find_loader(name)
        except Exception:
            # Catch all exceptions, not just ImportError.  If the __init__.py
            # for the parent package of the module raises an exception, it'll
            # propagate to here.
            loader = None
        return loader is not None

    @cached_attribute
    def filename(self):
        """
        Return the filename, if appropriate.

        The module itself will not be imported, but if the module is not a
        top-level module/package, accessing this attribute may cause the
        parent package to be imported.

        :rtype:
          `Filename`
        """
        # Use the loader mechanism to find the filename.  We do so instead of
        # using self.module.__file__, because the latter forces importing a
        # module, which may be undesirable.
        import pkgutil
        try:
            loader = pkgutil.get_loader(str(self.name))
        except ImportError:
            return None
        if not loader:
            return None
        # Get the filename using loader.get_filename().  Note that this does
        # more than just loader.filename: for example, it adds /__init__.py
        # for packages.
        filename = loader.get_filename()
        if not filename:
            return None
        return Filename(pyc_to_py(filename))

    @cached_attribute
    def text(self):
        return FileText(self.filename)

    def __text__(self):
        return self.text

    @cached_attribute
    def block(self):
        from pyflyby._parse import PythonBlock
        return PythonBlock(self.text)

    @staticmethod
    @memoize
    def list():
        """
        Enumerate all top-level packages/modules.

        :rtype:
          ``tuple`` of `ModuleHandle` s
        """
        import pkgutil
        # Get the list of top-level packages/modules using pkgutil.
        # We exclude "." from sys.path while doing so.  Python includes "." in
        # sys.path by default, but this is undesirable for autoimporting.  If
        # we autoimported random python scripts in the current directory, we
        # could accidentally execute code with side effects.  If the current
        # working directory is /tmp, trying to enumerate modules there also
        # causes problems, because there are typically directories there not
        # readable by the current user.
        with ExcludeImplicitCwdFromPathCtx():
            modlist = pkgutil.iter_modules(None)
            module_names = [t[1] for t in modlist]
        # pkgutil includes all *.py even if the name isn't a legal python
        # module name, e.g. if a directory in $PYTHONPATH has files named
        # "try.py" or "123.py", pkgutil will return entries named "try" or
        # "123".  Filter those out.
        module_names = [m for m in module_names if is_identifier(m)]
        # Canonicalize.
        return tuple(ModuleHandle(m) for m in sorted(set(module_names)))

    @cached_attribute
    def submodules(self):
        """
        Enumerate the importable submodules of this module.

          >>> ModuleHandle("email").submodules      # doctest:+ELLIPSIS
          (..., 'email.encoders', ..., 'email.mime', ...)

        :rtype:
          ``tuple`` of `ModuleHandle` s
        """
        import pkgutil
        module = self.module
        try:
            path = module.__path__
        except AttributeError:
            return ()
        # Enumerate the modules at a given path.  Prefer to use ``pkgutil`` if
        # we can.  However, if it fails due to OSError, use our own version
        # which is robust to that.
        try:
            submodule_names = [t[1] for t in pkgutil.iter_modules(path)]
        except OSError:
            submodule_names = [t[0] for p in path for t in _my_iter_modules(p)]
        return tuple(ModuleHandle("%s.%s" % (self.name,m))
                     for m in sorted(set(submodule_names)))

    @cached_attribute
    def exports(self):
        """
        Get symbols exported by this module.

        Note that this requires involves actually importing this module, which
        may have side effects.  (TODO: rewrite to avoid this?)

        :rtype:
          `ImportSet` or ``None``
        :return:
          Exports, or ``None`` if nothing exported.
        """
        from pyflyby._importclns import ImportStatement, ImportSet
        module = self.module
        try:
            members = module.__all__
        except AttributeError:
            members = dir(module)
            # Filter by non-private.
            members = [n for n in members if not n.startswith("_")]
            # Filter by definition in the module.
            def from_this_module(name):
                # TODO: could do this more robustly by parsing the AST and
                # looking for STOREs (definitions/assignments/etc).
                x = getattr(module, name)
                m = getattr(x, "__module__", None)
                if not m:
                    return False
                return DottedIdentifier(m).startswith(self.name)
            members = [n for n in members if from_this_module(n)]
        else:
            if not all(type(s) == str for s in members):
                raise Exception(
                    "Module %r contains non-string entries in __all__"
                    % (str(self.name),))
        # Filter out artificially added "deep" members.
        members = [n for n in members if "." not in n]
        if not members:
            return None
        return ImportSet(
            [ ImportStatement.from_parts(str(self.name), members) ])

    def __str__(self):
        return str(self.name)

    def __repr__(self):
        return "%s(%r)" % (type(self).__name__, str(self.name))

    def __hash__(self):
        return hash(self.name)

    def __cmp__(self, o):
        if self is o:
            return 0
        if not isinstance(o, ModuleHandle):
            return NotImplemented
        return cmp(self.name, o.name)

    def __eq__(self, o):
        if self is o:
            return True
        if not isinstance(o, ModuleHandle):
            return NotImplemented
        return self.name == o.name

    def __ne__(self, other):
        return not (self == other)

    # The rest are defined by total_ordering
    def __lt__(self, o):
        if not isinstance(o, ModuleHandle):
            return NotImplemented
        return self.name < o.name

    def __getitem__(self, x):
        if isinstance(x, slice):
            return type(self)(self.name[x])
        raise TypeError

    @classmethod
    def containing(cls, identifier):
        """
        Try to find the module that defines a name such as ``a.b.c`` by trying
        to import ``a``, ``a.b``, and ``a.b.c``.

        :return:
          The name of the 'deepest' module (most commonly it would be ``a.b``
          in this example).
        :rtype:
          `Module`
        """
        # In the code below we catch "Exception" rather than just ImportError
        # or AttributeError since importing and __getattr__ing can raise other
        # exceptions.
        identifier = DottedIdentifier(identifier)
        try:
            module = ModuleHandle(identifier[:1])
            result = module.module
        except Exception as e:
            raise ImportError(e)
        for part, prefix in zip(identifier, prefixes(identifier))[1:]:
            try:
                result = getattr(result, str(part))
            except Exception:
                try:
                    module = cls(prefix)
                    result = module.module
                except Exception as e:
                    raise ImportError(e)
            else:
                if isinstance(result, types.ModuleType):
                    module = cls(result)
        logger.debug("Imported %r to get %r", module, identifier)
        return module