# pyflyby/_file.py.
# Copyright (C) 2011, 2012, 2013, 2014, 2015, 2018 Karl Chen.
# License: MIT http://opensource.org/licenses/MIT

from __future__ import (absolute_import, division, print_function,
                        with_statement)

from   functools                import total_ordering
import io
import os
import re
import six
import sys

from   six                      import string_types

from   pyflyby._util            import cached_attribute, cmp, memoize

class UnsafeFilenameError(ValueError):
    pass


# TODO: statcache

@total_ordering
class Filename(object):
    """
    A filename.

      >>> Filename('/etc/passwd')
      Filename('/etc/passwd')

    """
    def __new__(cls, arg):
        if isinstance(arg, cls):
            return arg
        if isinstance(arg, six.string_types):
            return cls._from_filename(arg)
        raise TypeError

    @classmethod
    def _from_filename(cls, filename):
        if not isinstance(filename, six.string_types):
            raise TypeError
        filename = str(filename)
        if not filename:
            raise UnsafeFilenameError("(empty string)")
        if re.search("[^a-zA-Z0-9_=+{}/.,~@-]", filename):
            raise UnsafeFilenameError(filename)
        if re.search("(^|/)~", filename):
            raise UnsafeFilenameError(filename)
        self = object.__new__(cls)
        self._filename = os.path.abspath(filename)
        return self

    def __str__(self):
        return self._filename

    def __repr__(self):
        return "%s(%r)" % (type(self).__name__, self._filename)

    def __truediv__(self, x):
        return type(self)(os.path.join(self._filename, x))

    def __hash__(self):
        return hash(self._filename)

    def __eq__(self, o):
        if self is o:
            return True
        if not isinstance(o, Filename):
            return NotImplemented
        return self._filename == o._filename

    def __ne__(self, other):
        return not (self == other)

    # The rest are defined by total_ordering
    def __lt__(self, o):
        if not isinstance(o, Filename):
            return NotImplemented
        return self._filename < o._filename

    def __cmp__(self, o):
        if self is o:
            return 0
        if not isinstance(o, Filename):
            return NotImplemented
        return cmp(self._filename, o._filename)

    @cached_attribute
    def ext(self):
        """
        Returns the extension of this filename, including the dot.
        Returns ``None`` if no extension.

        :rtype:
          ``str`` or ``None``
        """
        lhs, dot, rhs = self._filename.rpartition('.')
        if not dot:
            return None
        return dot + rhs

    @cached_attribute
    def base(self):
        return os.path.basename(self._filename)

    @cached_attribute
    def dir(self):
        return type(self)(os.path.dirname(self._filename))

    @cached_attribute
    def real(self):
        return type(self)(os.path.realpath(self._filename))

    @property
    def realpath(self):
        return type(self)(os.path.realpath(self._filename))

    @property
    def exists(self):
        return os.path.exists(self._filename)

    @property
    def islink(self):
        return os.path.islink(self._filename)

    @property
    def isdir(self):
        return os.path.isdir(self._filename)

    @property
    def isfile(self):
        return os.path.isfile(self._filename)

    @property
    def isreadable(self):
        return os.access(self._filename, os.R_OK)

    @property
    def iswritable(self):
        return os.access(self._filename, os.W_OK)

    @property
    def isexecutable(self):
        return os.access(self._filename, os.X_OK)

    def startswith(self, prefix):
        prefix = Filename(prefix)
        if self == prefix:
            return True
        return self._filename.startswith("%s/" % (prefix,))

    def list(self, ignore_unsafe=True):
        filenames = [os.path.join(self._filename, f)
                     for f in sorted(os.listdir(self._filename))]
        result = []
        for f in filenames:
            try:
                f = Filename(f)
            except UnsafeFilenameError:
                if ignore_unsafe:
                    continue
                else:
                    raise
            result.append(f)
        return result

    @property
    def ancestors(self):
        """
        Return ancestors of self, from self to /.

          >>> Filename("/aa/bb").ancestors
          (Filename('/aa/bb'), Filename('/aa'), Filename('/'))

        :rtype:
          ``tuple`` of ``Filename`` s
        """
        result = [self]
        while True:
            dir = result[-1].dir
            if dir == result[-1]:
                break
            result.append(dir)
        return tuple(result)


@memoize
def _get_PATH():
    PATH = os.environ.get("PATH", "").split(os.pathsep)
    result = []
    for path in PATH:
        if not path:
            continue
        try:
            result.append(Filename(path))
        except UnsafeFilenameError:
            continue
    return tuple(result)


def which(program):
    """
    Find ``program`` on $PATH.

    :type program:
      ``str``
    :rtype:
      `Filename`
    :return:
      Program on $PATH, or ``None`` if not found.
    """
    # See if it exists in the current directory.
    candidate = Filename(program)
    if candidate.isreadable:
        return candidate
    for path in _get_PATH():
        candidate = path / program
        if candidate.isexecutable:
            return candidate
    return None


Filename.STDIN = Filename("/dev/stdin")

@total_ordering
class FilePos(object):
    """
    A (lineno, colno) position within a `FileText`.
    Both lineno and colno are 1-indexed.
    """

    def __new__(cls, *args):
        if len(args) == 0:
            return cls._ONE_ONE
        if len(args) == 1:
            arg, = args
            if isinstance(arg, cls):
                return arg
            elif arg is None:
                return cls._ONE_ONE
            elif isinstance(arg, tuple):
                args = arg
                # Fall through
            else:
                raise TypeError
        lineno, colno = cls._intint(args)
        if lineno == colno == 1:
            return cls._ONE_ONE # space optimization
        if lineno < 1:
            raise ValueError(
                "FilePos: invalid lineno=%d; should be >= 1" % lineno,)
        if colno < 1:
            raise ValueError(
                "FilePos: invalid colno=%d; should be >= 1" % colno,)
        return cls._from_lc(lineno, colno)

    @staticmethod
    def _intint(args):
        if (type(args) is tuple and
            len(args) == 2 and
            type(args[0]) is type(args[1]) is int):
            return args
        else:
            raise TypeError("Expected (int,int); got %r" % (args,))

    @classmethod
    def _from_lc(cls, lineno, colno):
        self = object.__new__(cls)
        self.lineno = lineno
        self.colno  = colno
        return self

    def __add__(self, delta):
        '''
        "Add" a coordinate (line,col) delta to this ``FilePos``.

        Note that addition here may be a non-obvious.  If there is any line
        movement, then the existing column number is ignored, and the new
        column is the new column delta + 1 (to convert into 1-based numbers).

        :rtype:
          `FilePos`
        '''
        ldelta, cdelta = self._intint(delta)
        assert ldelta >= 0 and cdelta >= 0
        if ldelta == 0:
            return FilePos(self.lineno, self.colno + cdelta)
        else:
            return FilePos(self.lineno + ldelta, 1 + cdelta)

    def __str__(self):
        return "(%d,%d)" % (self.lineno, self.colno)

    def __repr__(self):
        return "FilePos%s" % (self,)

    @property
    def _data(self):
        return (self.lineno, self.colno)

    def __eq__(self, other):
        if self is other:
            return True
        if not isinstance(other, FilePos):
            return NotImplemented
        return self._data == other._data

    def __ne__(self, other):
        return not (self == other)

    def __cmp__(self, other):
        if self is other:
            return 0
        if not isinstance(other, FilePos):
            return NotImplemented
        return cmp(self._data, other._data)

    # The rest are defined by total_ordering
    def __lt__(self, other):
        if self is other:
            return 0
        if not isinstance(other, FilePos):
            return NotImplemented
        return self._data < other._data

    def __hash__(self):
        return hash(self._data)


FilePos._ONE_ONE = FilePos._from_lc(1, 1)


@total_ordering
class FileText(object):
    """
    Represents a contiguous sequence of lines from a file.
    """

    def __new__(cls, arg, filename=None, startpos=None):
        """
        Return a new ``FileText`` instance.

        :type arg:
          ``FileText``, ``Filename``, ``str``, or tuple of ``str``
        :param arg:
          If a sequence of lines, then each should end with a newline and have
          no other newlines.  Otherwise, something that can be interpreted or
          converted into a sequence of lines.
        :type filename:
          `Filename`
        :param filename:
          Filename to attach to this ``FileText``, if not already given by
          ``arg``.
        :type startpos:
          ``FilePos``
        :param startpos:
          Starting file position (lineno & colno) of this ``FileText``, if not
          already given by ``arg``.
        :rtype:
          ``FileText``
        """
        if isinstance(arg, cls):
            if filename is startpos is None:
                return arg
            return arg.alter(filename=filename, startpos=startpos)
        elif isinstance(arg, Filename):
            return cls(read_file(arg), filename=filename, startpos=startpos)
        elif hasattr(arg, "__text__"):
            return FileText(arg.__text__(), filename=filename, startpos=startpos)
        elif isinstance(arg, six.string_types):
            self = object.__new__(cls)
            self.joined = arg
        else:
            raise TypeError("%s: unexpected %s"
                            % (cls.__name__, type(arg).__name__))
        if filename is not None:
            filename = Filename(filename)
        startpos = FilePos(startpos)
        self.filename = filename
        self.startpos = startpos
        return self

    @classmethod
    def _from_lines(cls, lines, filename, startpos):
        assert type(lines) is tuple
        assert len(lines) > 0
        assert isinstance(lines[0], string_types)
        assert not lines[-1].endswith("\n")
        self = object.__new__(cls)
        self.lines    = lines
        self.filename = filename
        self.startpos = startpos
        return self

    @cached_attribute
    def lines(self):
        r"""
        Lines that have been split by newline.

        These strings do NOT contain '\n'.

        If the input file ended in '\n', then the last item will be the empty
        string.  This is to avoid having to check lines[-1].endswith('\n')
        everywhere.

        :rtype:
          ``tuple`` of ``str``
        """
        # Used if only initialized with 'joined'.
        # We use str.split() instead of str.splitlines() because the latter
        # doesn't distinguish between strings that end in newline or not
        # (or requires extra work to process if we use splitlines(True)).
        return tuple(self.joined.split('\n'))

    @cached_attribute
    def joined(self): # used if only initialized with 'lines'
        return '\n'.join(self.lines)

    @classmethod
    def from_filename(cls, filename):
        return cls.from_lines(Filename(filename))

    def alter(self, filename=None, startpos=None):
        if filename is not None:
            filename = Filename(filename)
        else:
            filename = self.filename
        if startpos is not None:
            startpos = FilePos(startpos)
        else:
            startpos = self.startpos
        if filename == self.filename and startpos == self.startpos:
            return self
        else:
            result = object.__new__(type(self))
            result.lines    = self.lines
            result.joined   = self.joined
            result.filename = filename
            result.startpos = startpos
            return result

    @cached_attribute
    def endpos(self):
        """
        The position after the last character in the text.

        :rtype:
          ``FilePos``
        """
        startpos = self.startpos
        lines    = self.lines
        lineno   = startpos.lineno + len(lines) - 1
        if len(lines) == 1:
            colno = startpos.colno + len(lines[-1])
        else:
            colno = 1 + len(lines[-1])
        return FilePos(lineno, colno)

    def _lineno_to_index(self, lineno):
        lineindex = lineno - self.startpos.lineno
        # Check that the lineindex is in range.  We don't allow pointing at
        # the line after the last line because we already ensured that
        # self.lines contains an extra empty string if necessary, to indicate
        # a trailing newline in the file.
        if not 0 <= lineindex < len(self.lines):
            raise IndexError(
                "Line number %d out of range [%d, %d)"
                % (lineno, self.startpos.lineno, self.endpos.lineno))
        return lineindex

    def _colno_to_index(self, lineindex, colno):
        coloffset = self.startpos.colno if lineindex == 0 else 1
        colindex = colno - coloffset
        line = self.lines[lineindex]
        # Check that the colindex is in range.  We do allow pointing at the
        # character after the last (non-newline) character in the line.
        if not 0 <= colindex <= len(line):
            raise IndexError(
                "Column number %d on line %d out of range [%d, %d]"
                % (colno, lineindex+self.startpos.lineno,
                   coloffset, coloffset+len(line)))
        return colindex

    def __getitem__(self, arg):
        """
        Return the line(s) with the given line number(s).
        If slicing, returns an instance of ``FileText``.

        Note that line numbers are indexed based on ``self.startpos.lineno``
        (which is 1 at the start of the file).

          >>> FileText("a\\nb\\nc\\nd")[2]
          'b'

          >>> FileText("a\\nb\\nc\\nd")[2:4]
          FileText('b\\nc\\n', startpos=(2,1))

          >>> FileText("a\\nb\\nc\\nd")[0]
          Traceback (most recent call last):
          ...
          IndexError: Line number 0 out of range [1, 4)

        When slicing, the input arguments can also be given as ``FilePos``
        arguments or (lineno,colno) tuples.  These are 1-indexed at the start
        of the file.

          >>> FileText("a\\nb\\nc\\nd")[(2,2):4]
          FileText('\\nc\\n', startpos=(2,2))

        :rtype:
          ``str`` or `FileText`
        """
        L = self._lineno_to_index
        C = self._colno_to_index
        if isinstance(arg, slice):
            if arg.step is not None and arg.step != 1:
                raise ValueError("steps not supported")
            # Interpret start (lineno,colno) into indexes.
            if arg.start is None:
                start_lineindex = 0
                start_colindex = 0
            elif isinstance(arg.start, int):
                start_lineindex = L(arg.start)
                start_colindex = 0
            else:
                startpos = FilePos(arg.start)
                start_lineindex = L(startpos.lineno)
                start_colindex = C(start_lineindex, startpos.colno)
            # Interpret stop (lineno,colno) into indexes.
            if arg.stop is None:
                stop_lineindex = len(self.lines)
                stop_colindex = len(self.lines[-1])
            elif isinstance(arg.stop, int):
                stop_lineindex = L(arg.stop)
                stop_colindex = 0
            else:
                stoppos = FilePos(arg.stop)
                stop_lineindex = L(stoppos.lineno)
                stop_colindex = C(stop_lineindex, stoppos.colno)
            # {start,stop}_{lineindex,colindex} are now 0-indexed
            # [open,closed) ranges.
            assert 0 <= start_lineindex <= stop_lineindex < len(self.lines)
            assert 0 <= start_colindex <= len(self.lines[start_lineindex])
            assert 0 <= stop_colindex <= len(self.lines[stop_lineindex])
            # Optimization: return entire range
            if (start_lineindex == 0 and
                start_colindex == 0 and
                stop_lineindex == len(self.lines)-1 and
                stop_colindex == len(self.lines[-1])):
                return self
            # Get the lines we care about.  We always include an extra entry
            # at the end which we'll chop to the desired number of characters.
            result_split = list(self.lines[start_lineindex:stop_lineindex+1])
            # Clip the starting and ending strings.  We do the end clip first
            # in case the result has only one line.
            result_split[-1] = result_split[-1][:stop_colindex]
            result_split[0] = result_split[0][start_colindex:]
            # Compute the new starting line and column numbers.
            result_lineno = start_lineindex + self.startpos.lineno
            if start_lineindex == 0:
                result_colno = start_colindex + self.startpos.colno
            else:
                result_colno = start_colindex + 1
            result_startpos = FilePos(result_lineno, result_colno)
            return FileText._from_lines(tuple(result_split),
                                        filename=self.filename,
                                        startpos=result_startpos)
        elif isinstance(arg, int):
            # Return a single line.
            lineindex = L(arg)
            return self.lines[lineindex]
        else:
            raise TypeError("bad type %r" % (type(arg),))

    @classmethod
    def concatenate(cls, args):
        """
        Concatenate a bunch of `FileText` arguments.  Uses the ``filename``
        and ``startpos`` from the first argument.

        :rtype:
          `FileText`
        """
        args = [FileText(x) for x in args]
        if len(args) == 1:
            return args[0]
        return FileText(
            ''.join([l.joined for l in args]),
            filename=args[0].filename,
            startpos=args[0].startpos)

    def __repr__(self):
        r = "%s(%r" % (type(self).__name__, self.joined,)
        if self.filename is not None:
            r += ", filename=%r" % (str(self.filename),)
        if self.startpos != FilePos():
            r += ", startpos=%s" % (self.startpos,)
        r += ")"
        return r

    def __str__(self):
        return self.joined

    def __eq__(self, o):
        if self is o:
            return True
        if not isinstance(o, FileText):
            return NotImplemented
        return (self.filename == o.filename and
                self.joined   == o.joined   and
                self.startpos == o.startpos)

    def __ne__(self, other):
        return not (self == other)

    # The rest are defined by total_ordering
    def __lt__(self, o):
        if not isinstance(o, FileText):
            return NotImplemented
        return ((self.filename, self.joined, self.startpos) <
                   (o   .filename, o   .joined, o   .startpos))

    def __cmp__(self, o):
        if self is o:
            return 0
        if not isinstance(o, FileText):
            return NotImplemented
        return cmp((self.filename, self.joined, self.startpos),
                   (o   .filename, o   .joined, o   .startpos))

    def __hash__(self):
        h = hash((self.filename, self.joined, self.startpos))
        self.__hash__ = lambda: h
        return h


def read_file(filename):
    filename = Filename(filename)
    if filename == Filename.STDIN:
        data = sys.stdin.read()
    else:
        with io.open(str(filename), 'r') as f:
            data = f.read()
    return FileText(data, filename=filename)

def write_file(filename, data):
    filename = Filename(filename)
    data = FileText(data)
    with open(str(filename), 'w') as f:
        f.write(data.joined)

def atomic_write_file(filename, data):
    filename = Filename(filename)
    data = FileText(data)
    temp_filename = Filename("%s.tmp.%s" % (filename, os.getpid(),))
    write_file(temp_filename, data)
    try:
        st = os.stat(str(filename)) # OSError if file didn't exit before
        os.chmod(str(temp_filename), st.st_mode)
        os.chown(str(temp_filename), -1, st.st_gid) # OSError if not member of group
    except OSError:
        pass
    os.rename(str(temp_filename), str(filename))

def expand_py_files_from_args(pathnames, on_error=lambda filename: None):
    """
    Enumerate ``*.py`` files, recursively.

    Arguments that are files are always included.
    Arguments that are directories are recursively searched for ``*.py`` files.

    :type pathnames:
      ``list`` of `Filename` s
    :type on_error:
      callable
    :param on_error:
      Function that is called for arguments directly specified in ``pathnames``
      that don't exist or are otherwise inaccessible.
    :rtype:
      ``list`` of `Filename` s
    """
    if not isinstance(pathnames, (tuple, list)):
        pathnames = [pathnames]
    pathnames = [Filename(f) for f in pathnames]
    result = []
    # Check for problematic arguments.  Note that we intentionally only do
    # this for directly specified arguments, not for recursively traversed
    # arguments.
    stack = []
    for pathname in reversed(pathnames):
        if pathname.isfile:
            stack.append((pathname, True))
        elif pathname.isdir:
            stack.append((pathname, False))
        else:
            on_error(pathname)
    while stack:
        pathname, isfile = stack.pop(-1)
        if isfile:
            result.append(pathname)
            continue
        for f in reversed(pathname.list()):
            # Check inclusions/exclusions for recursion.  Note that we
            # intentionally do this in the recursive step rather than the
            # base step because if the user specification includes
            # e.g. .pyflyby, we do want to include it; however, we don't
            # want to recurse into .pyflyby ourselves.
            if f.base.startswith("."):
                continue
            if f.base == "__pycache__":
                continue
            if f.isfile:
                if f.ext == ".py":
                    stack.append((f, True))
            elif f.isdir:
                stack.append((f, False))
            else:
                # Silently ignore non-files/dirs from traversal.
                pass
    return result