#!/usr/bin/env python

# Copyright (c) 2008, William Stein (with permission)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the Sage Project nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY WILLIAM STEIN ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL William Stein BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# uses system command 'file' to print out
# types of all files in (Sage) source tar
#
# prints "DONE" when finished
#
# takes about 15 CPU minutes to go through Sage
#
# version 20080118
#
#  The following file types are considered understood
#  so no comment is made by them
#
#    TXT type
#    __init__.py, nodoctest.py files
#    .hg directories
#    *.html files (XML type)
#    *.pdf (PDF type)
#    symbolic links
#    *.png (PNG image data)
#    *.gif (GIF image data)
#    *.tiff (TIFF image data)
#    *.jpg (JPEG image data)
#    *.s (Assembler source)
#
#  Bad files (understood so NOT considered weird)
#
#    MS-DOS executables
#    ._* (AppleDouble encoded Macintosh files)
#    *.class (JAVA)
#    *.so (MACH-O)
#    *.o  (MACH-O)
#    *.dylib (MACH-O)
#    ELF
#


#runlevel = 0   # print everything
#runlevel = 1   # only print weird file types
runlevel = 2   # only print "bad" stuff

import sys,os
import sage.misc.misc

if len(sys.argv) != 2:
 print "tar file required"
 sys.exit()

print os.path.abspath('.')
name = sys.argv[1]
basename = os.path.split(name)[1]
tempdir = sage.misc.misc.tmp_dir()
cmd = "cp %s %s"%(name,tempdir)
os.system(cmd)

os.chdir(tempdir)
cmd = "tar xvf " + basename
print cmd
os.system(cmd)

def cleanup():
    global tempdir
    os.system('rm -rf "%s"'%tempdir)

name = basename

stack = [ name ]
print os.getcwd() + "/" + name
print "runlevel= ", runlevel
if (os.path.isfile(name) == False) and (os.path.isdir(name) == False):
 print "does not exist!!!"
 cleanup()
 sys.exit()
while len(stack) > 0:
 name = stack.pop()
 if name == -1:   # go up one level
   os.chdir("..")
 elif name == -2:  # tar what you have looked at
   basename = stack.pop()
   tarname = stack.pop()
   cmd = "tar cf " + tarname + " " + basename
   os.system(cmd)
   # remove base
   cmd = "rm -rf '" + basename + "'"
   if runlevel == 0:
     print basename + " -> " + tarname
   os.system(cmd)
 elif name == -3:  # gzip file
   name = stack.pop()
   if runlevel == 0:
     print name + " -> " + name + ".gz"
   cmd = "gzip '" + name + "'"
   os.system(cmd)
 elif name == -4:  # bzip file
   name = stack.pop()
   cmd = "bzip2 '" + name + "'"
   if runlevel == 0:
     print name + " -> " + name + ".bz2"
   os.system(cmd)
 elif name == -5:  # rename
   newname = stack.pop()
   name = stack.pop()
   os.rename(newname,name)
   if runlevel == 0:
     print newname + " -> " + name
 else:
   fullname = os.getcwd() + "/" + name
   # get file type
   cmd = "file '" + name + "' > /tmp/temp"
   os.system(cmd)
   f = open('/tmp/temp','r')
   file_output = f.read()
   # if file is a directory
   if file_output.find(': directory') > 0:
     if name == ".hg":
       if runlevel == 0:
         print fullname + ": IGNORING"
     # spkg/standard/fortran-20071120.p3/src/
     elif name == "g95":
       if runlevel == 0:
         print fullname + ": IGNORING"
     else:
       if runlevel == 0:
         print fullname + ": DIR"
       stack.append(-1)
       sys.stdout.flush()
       stack = stack + os.listdir(name)
       os.chdir(name)
   elif name.endswith('.spkg') > 0:
     # if file name ends in .spkg
     basename = name[0:name.find('.spkg')]
     if file_output.find('bzip2') > 0:
       newname = basename + '.tar.bz2'
     elif file_output.find('tar') > 0:
       newname = basename + '.tar'
     os.rename(name,newname)
     if runlevel == 0:
       print fullname + " -> " + newname
     stack.append(name)
     stack.append(newname)
     stack.append(-5)
     stack.append(newname)
   elif file_output.find(': bzip2') > 0:
     # if file is a bzip2 file
     cmd = 'bunzip2 ' + name
     os.system(cmd)
     basename = name[0:name.find('.bz2')]
     if runlevel == 0:
       print fullname + " -> " + basename
     stack.append(basename)
     stack.append(-4)
     stack.append(basename)
   elif file_output.find(': gzip ') > 0:
     if name.endswith(".gz"):
       # special case - BAD!!!
       # 'spkg/standard/tachyon-0.98beta.p3/src/docs/tachyon.html.tar.gz'
       # overwrites dir 'tachyon'
       if name == "tachyon.html.tar.gz":
         if runlevel <= 1:
           print fullname + ": IGNORING"
       else:
         cmd = 'gunzip ' + name
         os.system(cmd)
         basename = name[0:name.find('.gz')]
         stack.append(basename)
         stack.append(-3)
         stack.append(basename)
         if runlevel == 0:
           print fullname + " -> " + basename
     elif name.endswith(".tgz"):
       basename = name[0:name.find('.tgz')]
       newname = basename + ".tar.gz"
       os.rename(name, newname)
       stack.append(name)
       stack.append(newname)
       stack.append(-5)
       stack.append(newname)
       if runlevel == 0:
         print fullname + " -> " + newname
     elif name.endswith(".dia"): # See http://live.gnome.org/Dia
       basename = name[0:name.find('.dia')]
       newname = basename + ".gz"
       os.rename(name, newname)
       stack.append(name)
       stack.append(newname)
       stack.append(-5)
       stack.append(newname)
       if runlevel == 0:
         print fullname + " -> " + newname
     elif name.endswith(".rda"):
       if runlevel <= 1:
         print fullname + ": R data format? (find error)"
     else:
       print fullname + ": GZIP with unusual suffix"
       print "abnormal exit"
       cleanup()
       sys.exit()
   elif file_output.find(': POSIX tar archive') > 0:
     # special case BAD!!!
     # 'spkg/standard/python-2.5.1.p10/src/Lib/test/testtar.tar'
     # untars with error BAD!!!
     if name == "testtar.tar":
       if runlevel <= 1:
         print fullname + ": IGNORING"
     # special case BAD!!!
     # 'spkg/standard/jmol-11.5.2/jmol/jars/vecmath1.2-1.14.tar'
     # 'tar tvf' first line does NOT tell directory name
     elif name == "vecmath1.2-1.14.tar":
       if runlevel <= 1:
         print fullname + ": IGNORING"
     else:
       # if file is a tar file
       # get name that tar unpacks to
       cmd = "tar tvf " + name + " > /tmp/temp"
       os.system(cmd)
       f = open('/tmp/temp','r')
       temp0 = f.read()
       temp1 = temp0.splitlines()
       temp2 = temp1[0]
       temp3 = temp2.split()
       temp4 = temp3[len(temp3) - 1]
       newname = temp4[0:len(temp4)-1]
       # now untar
       cmd = 'tar xf ' + name
       os.system(cmd)
       # remove tar file
       os.unlink(os.getcwd() + '/' + name)
       if runlevel == 0:
         print fullname + " -> " + newname
       # save tarfilename and newname on stack
       stack.append(name)
       stack.append(newname)
       stack.append(-2)
       stack.append(newname)
   elif file_output.find('text') != -1:
     if runlevel == 0:
       print fullname + ": TXT"
   elif file_output.find('TeX DVI') > 0:
     if runlevel < 2:
       print fullname + ": TeX DVI"
   elif file_output.find(': XML') > 0:
     if name.endswith(".html") > 0:
       if runlevel == 0:
         print fullname + ": XML"
     else:
       if runlevel < 2:
         print fullname + ": XML"
   elif file_output.find(': Zip archive data') > 0:
     if runlevel < 2:
       print fullname + ": ZIP"
   elif file_output.find('Java') > 0:
     if name.endswith(".class"):
       if runlevel == 0:
         print fullname + ": JAVA"
       if runlevel == 2:
         print fullname
     else:
       if runlevel < 2:
         print fullname + ": JAVA"
   elif file_output.find(': data') > 0:
     if runlevel < 2:
       print fullname + ": DATA"
   elif file_output.find(': very short file (no magic)') > 0:
     if name == "__init__.py":
       if runlevel == 0:
         print fullname + ": very short file (no magic)"
     else:
       if runlevel < 2:
         print fullname + ": very short file (no magic)"
   elif file_output.find(': DCL command file') > 0:
     if runlevel < 2:
       print fullname + ": DCL command file"
   elif file_output.find(': CLIPPER instruction trace') > 0:
     if runlevel < 2:
       print fullname + ": CLIPPER instruction trace"
   elif file_output.find(': Palm OS dynamic library data') > 0:
     if runlevel < 2:
       print fullname + ": Palm OS dynamic library data"
   elif file_output.find(': 80386 COFF') > 0:
     if runlevel < 2:
       print fullname + ": 80386 COFF"
   elif file_output.find(': ACB archive data') > 0:
     if runlevel < 2:
       print fullname + ": ACB archive data"
   elif file_output.find(': MS Windows HtmlHelp Data') > 0:
     if runlevel < 2:
       print fullname + ": MS Windows HtmlHelp Data"
   elif file_output.find(': AppleDouble encoded Macintosh file') > 0:
     if name.startswith("._"):
       if runlevel == 0:
         print fullname + ": AppleDouble encoded Macintosh file"
       if runlevel == 2:
         print fullname
     else:
       if runlevel < 2:
         print fullname + ": AppleDouble encoded Macintosh file"
   elif file_output.find(': Macromedia Flash data') > 0:
     if runlevel < 2:
       print fullname + ": Macromedia Flash data"
   elif file_output.find(': Microsoft Installer') > 0:
     if runlevel < 2:
       print fullname + ": Microsoft Installer"
   elif file_output.find(': PNG image data') > 0:
     if name.endswith(".png") > 0:
       if runlevel == 0:
         print fullname + ": PNG image data"
     else:
       if runlevel < 2:
         print fullname + ": PNG image data"
   elif file_output.find(': empty') > 0:
     if name == "__init__.py":
       if runlevel == 0:
         print fullname + ": empty"
     elif name == "nodoctest.py":
       if runlevel == 0:
         print fullname + ": empty"
     else:
       if runlevel < 2:
         print fullname + ": empty"
   elif file_output.find('8086 relocatable') > 0:
     if runlevel < 2:
       print fullname + ": 8086 relocatable"
   elif file_output.find(': PC bitmap data') > 0:
     if runlevel < 2:
       print fullname + ": PC bitmap data"
   elif file_output.find(': GIF image data') > 0:
     if name.endswith(".gif") > 0:
       if runlevel == 0:
         print fullname + ": GIF image data"
     else:
       if runlevel < 2:
         print fullname + ": GIF image data"
   elif file_output.find(': Apple binary property list') > 0:
     if runlevel < 2:
       print fullname + ": Apple binary property list"
   elif file_output.find(': LaTeX table of contents') > 0:
     if runlevel < 2:
       print fullname + ": LaTeX table of contents"
   elif file_output.find(': Makeindex log file') > 0:
     if runlevel < 2:
       print fullname + ": Makeindex log file"
   elif file_output.find(': LaTeX raw index file') > 0:
     if runlevel < 2:
       print fullname + ": LaTeX raw index file"
   elif file_output.find('LaTeX auxiliary file') > 0:
     if runlevel < 2:
       print fullname + ": LaTeX auxiliary file"
   elif file_output.find('TIFF image data') > 0:
     if name.endswith(".tiff"):
       if runlevel == 0:
         print fullname + ": TIFF image data"
     else:
       if runlevel < 2:
         print fullname + ": TIFF image data"
   elif file_output.find('DOS EPS Binary File') > 0:
     if runlevel < 2:
       print fullname + ": DOS EPS Binary File"
   elif file_output.find('MPEG sequence') > 0:
     if runlevel < 2:
       print fullname + ": MPEG sequence"
   elif file_output.find('JPEG image data') > 0:
     if name.endswith(".jpg"):
       if runlevel == 0:
         print fullname + ": JPEG image data"
     else:
       if runlevel < 2:
         print fullname + ": JPEG image data"
   elif file_output.find(': Apple Old Partition') > 0:
     if runlevel < 2:
       print fullname + ": Apple Old Partition"
   elif file_output.find(': current ar archive') > 0:
     if runlevel < 2:
       print fullname + ": current ar archive"
   elif file_output.find(': python 2.3 byte-compiled') > 0:
     if runlevel < 2:
       print fullname + ": python 2.3 byte-compiled"
   elif file_output.find(': python 2.4 byte-compiled') > 0:
     if runlevel < 2:
       print fullname + ": python 2.4 byte-compiled"
   elif file_output.find(': python 2.5 byte-compiled') > 0:
     if runlevel < 2:
       print fullname + ": python 2.5 byte-compiled"
   elif file_output.find('Assembler source') > 0:
     if name.endswith(".s"):
       if runlevel == 0:
         print fullname + ": Assembler source"
     else:
       if runlevel < 2:
         print fullname + ": Assembler source"
   elif file_output.find(': PDF') > 0:
     if name.endswith(".pdf") > 0:
       if runlevel == 0:
         print fullname + ": PDF"
     else:
       if runlevel < 2:
         print fullname + ": PDF"
   elif file_output.find(': MS-DOS executable') > 0:
     if runlevel == 0:
       print fullname + ": MS-DOS executable"
     if runlevel == 2:
       print fullname
   elif file_output.find(': DOS executable') > 0:
     if runlevel < 2:
       print fullname + ": DOS executable"
   elif file_output.find(': Matlab v5 mat-file') > 0:
     if runlevel < 2:
       print fullname + ": Matlab v5 mat-file"
   elif file_output.find(': Extreme Tracker AMS Module v1.3') > 0:
     if runlevel < 2:
       print fullname + ": Extreme Tracker AMS Module v1.3"
   elif file_output.find(': JVT NAL sequence') > 0:
     if runlevel < 2:
       print fullname + ": JVT NAL sequence"
   elif file_output.find(': NeXT/Apple typedstream data') > 0:
     if runlevel < 2:
       print fullname + ": NeXT/Apple typedstream data"
   elif file_output.find(': Rich Text Format data') > 0:
     if runlevel < 2:
       print fullname + ": Rich Text Format data"
   elif file_output.find(': AppleSingle encoded Macintosh file') > 0:
     if runlevel < 2:
       print fullname + ": AppleSingle encoded Macintosh file"
   elif file_output.find(': Adobe Photoshop Image') > 0:
     if runlevel < 2:
       print fullname + ": Adobe Photoshop Image"
   elif file_output.find(': Macintosh Application (data)') > 0:
     if runlevel < 2:
       print fullname + ": Macintosh Application (data)"
   elif file_output.find(': X11 SNF font data') > 0:
     if runlevel < 2:
       print fullname + ": X11 SNF font data"
   elif file_output.find(': Sun/NeXT audio data') > 0:
     if runlevel < 2:
       print fullname + ": Sun/NeXT audio data"
   elif file_output.find(': Berkeley DB') > 0:
     if runlevel < 2:
       print fullname + ": Berkeley DB"
   elif file_output.find(': multipart/mixed') > 0:
     if runlevel < 2:
       print fullname + ": multipart/mixed"
   elif file_output.find(': message/rfc822') > 0:
     if runlevel < 2:
       print fullname + ": message/rfc822"
   elif file_output.find(': LaTeX sorted index') > 0:
     if runlevel < 2:
       print fullname + ": LaTeX sorted index"
   elif file_output.find(': Xara graphics file') > 0:
     if runlevel < 2:
       print fullname + ": Xara graphics file"
   elif file_output.find(': PalmOS application') > 0:
     if runlevel < 2:
       print fullname + ": PalmOS application"
   elif file_output.find(': Par archive data') > 0:
     if runlevel < 2:
       print fullname + ": Par archive data"
   elif file_output.find(': PGP key public ring') > 0:
     if runlevel < 2:
       print fullname + ": PGP key public ring"
   elif file_output.find(': GPG key public ring') > 0:
     if runlevel < 2:
       print fullname + ": GPG key public ring"
   elif file_output.find(': PGP key security ring') > 0:
     if runlevel < 2:
       print fullname + ": PGP key security ring"
   elif file_output.find(': FITS image data') > 0:
     if runlevel < 2:
       print fullname + ": FITS image data"
   elif file_output.find(': Bio-Rad .PIC Image File') > 0:
     if runlevel < 2:
       print fullname + ": Bio-Rad .PIC Image File"
   elif file_output.find(': Arhangel archive data') > 0:
     if runlevel < 2:
       print fullname + ": Arhangel archive data"
   elif file_output.find(': RISC OS Draw file data') > 0:
     if runlevel < 2:
       print fullname + ": RISC OS Draw file data"
   elif file_output.find(': CLIPPER instruction profile') > 0:
     if runlevel < 2:
       print fullname + ": CLIPPER instruction profile"
   elif file_output.find(': character Computer Graphics Metafile') > 0:
     if runlevel < 2:
       print fullname + ": character Computer Graphics Metafile"
   elif file_output.find(': Windows INF file') > 0:
     if runlevel < 2:
       print fullname + ": Windows INF file"
   elif file_output.find(': Emacs v18 byte-compiled Lisp data') > 0:
     if runlevel < 2:
       print fullname + ": Emacs v18 byte-compiled Lisp data"
   elif file_output.find(': Netpbm PPM') > 0:
     if runlevel < 2:
       print fullname + ": Netpbm PPM"
   elif file_output.find(': TrueType font data') > 0:
     if runlevel < 2:
       print fullname + ": TrueType font data"
   elif file_output.find(': ASCII font metrics') > 0:
     if runlevel < 2:
       print fullname + ": ASCII font metrics"
   elif file_output.find(': Maple worksheet') > 0:
     if runlevel < 2:
       print fullname + ": Maple worksheet"
   elif file_output.find(': GNU message catalog') > 0:
     if runlevel < 2:
       print fullname + ": GNU message catalog"
   elif file_output.find(': ELF 32-bit LSB executable, Intel 80386') > 0:
     if runlevel == 0:
       print fullname + ": ELF 32-bit LSB executable"
     if runlevel == 2:
       print fullname
   elif file_output.find(': symbolic link') > 0:
     if runlevel == 0:
       print fullname + ": symbolic link"
   elif file_output.find('SysEx File') > 0:
     if runlevel < 2:
       print fullname + ": SysEx File"
   elif file_output.find(': libtool library file') > 0:
     if runlevel < 2:
       print fullname + ": libtool library file"
   elif file_output.find(': SQLite 3.x database') > 0:
     if runlevel < 2:
       print fullname + ": SQLite 3.x database"
   elif file_output.find(': YAC archive data') > 0:
     if runlevel < 2:
       print fullname + ": YAC archive data"
   elif file_output.find(': PGP armored data') > 0:
     if runlevel < 2:
       print fullname + ": PGP armored data"
   elif file_output.find(': Apple QuickTime movie') > 0:
     if runlevel < 2:
       print fullname + ": Apple QuickTime movie"
   elif file_output.find(': Quake I or II world') > 0:
     if runlevel < 2:
       print fullname + ": Quake I or II world"
   elif file_output.find(': Mach-O') > 0:
     if name.endswith(".so") | name.endswith(".o") | name.endswith(".dylib"):
       if runlevel == 0:
         print fullname + ": Mach-O"
       if runlevel == 2:
         print fullname
     else:
       if runlevel < 2:
         print fullname + ": Mach-O"
   elif file_output.find(': ELF 64-bit') > 0:
     if runlevel == 0:
       print fullname + ": ELF 64-bit"
     if runlevel == 2:
       print fullname
   elif file_output.find(': ELF 32-bit') > 0:
     if runlevel == 0:
       print fullname + ": ELF 32-bit"
     if runlevel == 2:
       print fullname
   elif file_output.find(': SPEC') > 0:
     if runlevel < 2:
       print fullname + ": SPEC"
   elif file_output.find(': Octave binary data') > 0:
     if runlevel < 2:
       print fullname + ": Octave binary data"
   elif file_output.find(': DBase 3 data file') > 0:
     if runlevel < 2:
       print fullname + ": DBase 3 data file"
   elif file_output.find(': OpenOffice.org') > 0:
     if runlevel < 2:
       print fullname + ": OpenOffice.org"
   elif file_output.find(': fifo (named pipe)') > 0:
     if runlevel < 2:
       print fullname + ": fifo (named pipe)"
   elif file_output.find(': broken symbolic link') > 0:
     if runlevel < 2:
       print fullname + ": broken symbolic link"
   elif file_output.find(': timezone data') > 0:
     if runlevel < 2:
       print fullname + ": timezone data"
   elif file_output.find(': Macintosh Library (data)') > 0:
     if runlevel < 2:
       print fullname + ": Macintosh Library (data)"
   elif file_output.find(': SPSS System File') > 0:
     if runlevel < 2:
       print fullname + ": SPSS System File"
   elif file_output.find(': mc68020 pure executable not stripped') > 0:
     if runlevel < 2:
       print fullname + ": mc68020 pure executable not stripped"
   elif file_output.find(': MPEG ADTS') > 0:
     if runlevel < 2:
       print fullname + ": MPEG ADTS"
   elif file_output.find(': XPack DiskImage archive data') > 0:
     if runlevel < 2:
       print fullname + ": XPack DiskImage archive data"
   elif file_output.find(': VMS Alpha executable') > 0:
     if runlevel < 2:
       print fullname + ": VMS Alpha executable"
   else:
     print fullname + ": UNKNOWN"
     print file_output
cleanup()
print "DONE!!!"
