#!/usr/bin/env python
"""
Preparse a .sage file and save the result to a .py file.

AUTHOR:
    -- William Stein (2005): first version
    -- William Stein (2008): fix trac #2391 and document the code.
    -- Dan Drake (2009): fix trac #5052
    -- Dan Drake (2010-12-08): fix trac #10440
"""

import os, sys, re

# The spkg/bin/sage script passes the files to be preparsed as
# arguments (but remove sys.argv[0]).
files = sys.argv[1:]

# There must be at least 1 file or we display an error/usage message
# and exit
if len(files) == 0:
    print "Usage: %s <file1.sage> <file2.sage>..."%sys.argv[0]
    print "Creates files file1.py, file2.py ... that are the Sage"
    print "preparsed versions of file1.sage, file2.sage ..."
    print "If a non-autogenerated .py file with the same name"
    print "exists, you will receive an error and the file will"
    print "not be overwritten."
    sys.exit(1)

# The module-scope variable contains a list of all files we
# have seen while preparsing a given file.  The point of this
# is that we want to avoid preparsing a file we have already
# seen, since then infinite loops would result from mutual
# recursive includes.
files_so_far = []

# This message is inserted in autogenerated files so that the reader
# will know, and so we know it is safe to overwrite them.
AUTOGEN_MSG = "# This file was *autogenerated* from the file "

# We use this regexp to parse lines with load or attach statements.
# Here's what it looks for:
#
# A (possibly empty) sequence of whitespace at the beginning of the
# line, saved as a group named 'lws';
#
#   followed by
#
# the word "load" or "attach";
#
#   followed by
#
# a nonempty sequence of whitespace;
#
#   followed by
#
# whatever else is on the line, saved as a group named 'files'.
#
# We want to save the leading white space so that we can maintain
# correct indentation in the preparsed file.
load_or_attach = re.compile(r"^(?P<lws>\s*)(load|attach)\s+(?P<files>.*)$")

def do_preparse(f, files_before=[]):
    """
    Preparse the file f and write the result out to a filename
    with extension .py.

    INPUT:
        f -- string: the name of a file
        files_before -- list of strings of previous filenames loaded (to avoid circular loops)

    OUTPUT:
        writes a file with extension .py to disk.
    """
    if f in files_so_far:
        return
    files_so_far.append(f)
    if not os.path.exists(f):
        print "%s: File '%s' is missing"%(sys.argv[0], f)
        return
    if f[-3:] == '.py':
        return
    if f[-5:] != '.sage':
        print "%s: Unknown file type %s"%(sys.argv[0], f)
        sys.exit(1)

    fname = '%s.py'%f[:-5]
    if os.path.exists(fname):
        if AUTOGEN_MSG not in open(fname).read():
            print "Refusing to overwrite existing non-autogenerated file '%s'."%os.path.abspath(fname)
            print "Please delete or move this .py file manually."
            sys.exit(1)

    # TODO:
    # I am commenting this "intelligence" out, since, e.g., if I change
    # the preparser between versions this can cause problems.  This
    # is an optimization that definitely isn't needed at present, since
    # preparsing is so fast.
    # Idea: I could introduce version numbers, though....
    #if os.path.exists(fname) and os.path.getmtime(fname) >= os.path.getmtime(f):
    #    return

    # Finally open the file
    F = open(f).read()

    # Check to see if a coding is specified in the .sage file. If it is,
    # then we want to copy it over to the new file and not include it in
    # the preprocessing. If both the first and second line have an
    # encoding declaration, the second line's encoding will get used.

    lines = F.splitlines()
    coding = ''
    for num, line in enumerate(lines[:2]):
        if re.search(r"coding[:=]\s*([-\w.]+)", line):
            coding = line + '\n'
            F = '\n'.join(lines[:num] + lines[(num+1):])

    # Preparse it
    from sage.misc.preparser  import preparse_file
    G = preparse_file(F)

    # Check for load/attach commands.
    G = do_load_and_attach(G, f, files_before)


    # Put the Sage library include along with a autogen message in the file.
    # It is ** critical ** that we put this after the mdoule docstring, since
    # otherwise the module docstring will disappear.
    insert = '%s%s.\nfrom sage.all_cmdline import *   # import sage library\n'%(AUTOGEN_MSG, f)
    i = find_position_right_after_module_docstring(G)
    G = coding + G[:i] + insert + G[i:]

    # Finally, write out the result.
    open(fname, 'w').write(G)

def find_position_right_after_module_docstring(G):
    """
    Return first position right after the module docstring of G, if it
    has one.  Otherwise return 0.

    INPUT:
        G -- a string
    OUTPUT:
        an integer -- the index into G so that G[i] is right after
                      the module docstring of G, if G has one.
    """
    # The basic idea below is that we look at each line first ignoring
    # all empty lines and commented out lines.  Then we check to see
    # if the next line is a docstring.  If so, we find where that
    # docstring ends.
    v = G.splitlines()
    i = 0
    while i < len(v):
        s = v[i].strip()
        if len(s) > 0 and s[0] != '#':
            break
        i += 1
    if i >= len(v):
        # No module docstring --- entire file is commented out
        return 0
    # Now v[i] contains the first line of the first statement in the file.
    # Is it a docstring?
    n = v[i].lstrip()
    if not (n[0] in ['"',"'"] or n[0:2] in ['r"',"r'"]):
        # not a docstring
        return 0

    # First line is the module docstring.  Where does it end?
    def pos_after_line(k):
        return sum(len(v[j])+1 for j in range(k+1))

    n = n.lstrip('r')  # strip leading r if there is one
    if n[:3] in ["'''", '"""']:
        quotes = n[:3]
        # possibly multiline
        if quotes in n[3:]:
            return pos_after_line(i)
        j = i+1
        while j < len(v) and quotes not in v[j]:
            j += 1
        return pos_after_line(j)
    else:
        # it must be a single line; so add up the lengths of all lines
        # including this one and return that
        return pos_after_line(i)



def do_load_and_attach(G, file, files_before):
    """
    Parse a file G and replace load and attach statements with the
    corresponding execfile() statements.

    INPUT:
        G -- a string; a file loaded in from disk
        file -- the name of the file that contains the non-preparsed
                version of G.
        files_before -- list of files seen so far (don't recurse into
                infinite loop)

    OUTPUT:
        string -- result of parsing load/attach statements in G, i.e.
                  modified version of G with execfiles.
    """
    s = ''
    for t in G.split('\n'):
        z = load_or_attach.match(t)
        if z:
            files = z.group('files').split()
            lws = z.group('lws')
            for w in files:
                name = w.replace(',','').replace('"','').replace("'","")
                #print "'%s'"%name, files_before
                if name in files_before:
                   print "WARNING: not loading %s (in %s) again since would cause circular loop"%(name, file)
                   continue
                if name.endswith('.sage'):
                    do_preparse(name, files_before + [file])
                    s += lws + 'execfile("%s.py")\n'%name[:-5]
                elif name.endswith('.py'):
                    s += lws + 'execfile("%s")\n'%name
        else:
            s += t + '\n'
    return s


# Here we do the actual work.  We iterate over ever
# file in the input args and create the corresponding
# output file.
for f in files:
    do_preparse(f)
