#!/usr/bin/python #tardump.py - tarDump 1.1 - 2 Mar 01 #A basic cross-platform script to inspect SDTS .tar files. #Update at http://www.3dartist.com/WP/python/pycode.htm#tardump #Created by Bill Allen ############################################################################ ##########> USER-ASSIGNED VARIABLES <####> put names between '' <########### # inFile = '' inDir = '' outFile = '' outDir = '' # ### configuration ### # eolMark = '|' #end of line marker lineLen = 50 #output line length (50 is good, 0=none) noJunk = 1 #set to 0 to see garbage (confusing) longCntMax = 3 #limit data blocks to show from long files # # set to 0 to show all (hundreds) ### character substitutions ### # sub000 = chr(183) #'·'=dot for ASCII zero sub010 = chr(172) #'¬'=for line feed ASCII 10 sub013 = chr(171) #'«'=for carriage return ASCII 13 sub030 = chr(176) #'°'=degree/superscr zero for ASCII 30 sub031 = chr(185) #'¹'=superscript one for ASCII 31 subElse = chr(191) #'¿'=upside-down question for non-ASCII subJunk = chr(184) #'¸'=garbage character used for padding # #---------- examples ------------------------------------------------------- #inFile = '30_2_1_1009294.tar.gz' #.tar OR .tar.gz file to dump to text #inDir = 'c:/Python/demtest/' #folder where it is located #outFile = '' #output file name - OVERwrites older #outDir = 'c:/temp/dems/' #output folder, blank ('') if same as in ############################################################################ ##########> change nothing below here <##################################### #HOW TO USE: #This program dumps a wordprocessor-safe text file from a USGS SDTS # .tar.gz or .tar file. It accepts _tar.gz but not .tgz names. #Place this script anywhere you like on a machine with Python installed. #TarDump is not a self-executing program but rather a Python script. # You don't have to be a programmer to use it, but you must have # the Python 2.x language interpreter installed on your system. # Python is a free download from http://www.python.org for many # different platforms (Mac, Win95+, etc.), and comes with Linux # (Linux users may need to upgrade to 2.x). #With no user interface or command line inputs, this script requires # that you type in the starting folder and .tar.gz file names (output # file and folder names are optional). Do that above, in the area # marked "user-assigned variables." Use forward slashes in folders. #You also may change the configuration settings and non-ASCII # character substitutions, but try the defaults first. (Change # nothing else, unless you intend to rewrite the script.) #When you have named the file and folder(s)--they must exist and be # properly stated, then SAVE and RUN the script. #When done, you should find the output text file in the original or, # if designated, the destination folder. The original .tar.gz or # .tar file is left untouched. #PLATFORM: This code should run on any machine that has Python # 2.x installed, but has been tested only with 2.0 on Windows ME. #HISTORY: #2001-03-02: v1.1 - Now handles original Unix TAR files and blocks # of all nulls, and can skip over blocks from any long files. #2001-02-17: v1.0 & 1.0a - First posted and then revised slightly. #NOTES: #See http://www.3dartist.com/WP/sdts/sdtsnotes.htm#dumps # for more about tarDump and uses for it. #To learn more about gzip and TAR files, visit # http://www.3dartist.com/WP/python/pynotes.htm#gztar #LICENSE: This code is free software provided as is and without # warranty for fitness for any purpose. Use it at your own risk. # This code and derivations may be used anywhere, including for # commercial application. If used publicly, acknowledgment # and a link back would be appreciated. ### BEGIN PROGRAM ### #--- local "global" constants - don't touch! --- NOT_TARHEAD = 'not a TAR file header block' TARBLK_SIZE = 512 #TAR files come in 512-byte blocks avoidBig = longCntMax * TARBLK_SIZE #skip showing past this much file data magicList = ['ustar'] #list of TAR types used for SDTS #--- modules & functions --- import gzip, time def fixByte(c,p,pMax): if p >= pMax and noJunk: return subJunk o = ord(c) if o >= 32 and o <= 126: return c elif o == 30: return sub030 elif o == 31: return sub031 elif o == 0: return sub000 elif o == 10: return sub010 elif o == 13: return sub030 else: return subElse #end def fixByte def fixBuffer(buff,useLen): #nothing in block, something in block, all in block + watch switches if useLen < 1 and noJunk: return '\t( EMPTY BLOCK )\n' cnt = 0 s = '' for i in range(TARBLK_SIZE): cnt = cnt + 1 s = s + fixByte(buff[i],i,useLen) if cnt == lineLen: cnt = 0 if lineLen > 0: s = s + eolMark if lineLen > 0: return s + eolMark else: return s + '\n' #end def fixBuffer def dumpTar(): blockCnt = 0 fileLen = 0 longFlag = 0 f = open(inDir+inFile,'rb') g = open(outDir+outFile,'w') g.write(inFile+'\nLEGEND:\ncode = ASCII value\n' \ +sub000+' = 0 null\n'+sub010+' = 10 line feed\n' \ +sub013+' = 13 carriage return\n' \ +sub030+' = 30 DDF unit terminator\n' \ +sub031+' = 31 DDF field terminator\n' \ +subJunk+' = junk characters\n'+subElse+' = binary data\n') print 'Dumping',inDir+inFile,'\n\tto',outDir+outFile while 1: b = f.read(TARBLK_SIZE) if not b: #end of file break blockCnt = blockCnt + 1 if fileLen < 1: #this is a header block if blockCnt == 1: #1st header must have magic word or be TAR-like magic = b[257:262] if not magic in magicList: if not (magic == '\0\0\0\0\0' and b[106:108] == ' \0' and b[114:116] == ' \0' and b[122:124] == ' \0' and b[TARBLK_SIZE-1] == '\0'): print '\tERROR: Oops, not a known TAR file type: [' \ + magic + ']' break if b[0] == '\0': #empty blocks can be all nulls g.write('\n>> block '+str(blockCnt)+' ' +'\t( EMPTY BLOCK )\n') continue #note & loop fileNm = b[0:99] #name is in 1st 99 bytes fileNm = fileNm[0:fileNm.find('\0')] fileLen = int(b[124:135],8) if longCntMax > 0: #skip over blocks from long files if longFlag: #done skipping blocks g.write('\n\t('+str((longCnt-longCntMax)+1) \ +' data blocks skipped)\n') longFlag = 0 longCnt = 0 if fileLen >= avoidBig: #skip over long files longFlag = 1 longCnt = 0 modTime = time.ctime(int(b[136:147],8)) g.write('\n>> block '+str(blockCnt)+' '+fileNm+' ' \ +str(fileLen)+' bytes '+modTime+'\n') g.write(fixBuffer(b,TARBLK_SIZE)) else: if longFlag: longCnt = longCnt + 1 if longCnt > longCntMax: fileLen = fileLen - TARBLK_SIZE continue #loop until new header appears g.write('\n>> block '+str(blockCnt)+'\n') g.write(fixBuffer(b,fileLen)) fileLen = fileLen - TARBLK_SIZE f.close() g.flush() g.close() print '\tAll done.' #end def dumpTar function def openGZ(gzDir,gzFile,dest): print '\nExtracting TAR\n\tfrom',gzDir+gzFile,'\n\tto',dest f = gzip.open(gzDir+gzFile) #open file, defaults to 'rb' g = open(dest+gzFile[:-3],'wb') #open new file without .gz name extension while 1: chunk = f.read(1024) #read a chunk of the .gz file if not chunk: #end of file break g.write(chunk) #write a chunk of the .tar file f.close() g.flush() g.close() print '\tGzip file done.' #end def openGZ function #--- go to work --- if outDir == '': #if no outDir, use inDir for save outDir = inDir if outFile == '': #if no outFile name, create one x = inFile.find('.') # find the 1st period if x < 4: # safety outFile = 'tardump.txt' # grab the part of a USGS file name else: # most likely to be unique outFile = inFile[x-4:x] + 'dump.txt' eolMark = eolMark + '\n' #this saves 1000s of steps later if inFile[-3:].upper() == '.GZ': #if it's a tar.gz, then get the TAR openGZ(inDir,inFile,outDir) # & make the inFile be that TAR inDir = outDir inFile = inFile[0:-3] if inFile[-3:].upper() == 'TAR' and '_.'.find(inFile[-4]) > -1: dumpTar() else: print 'ERROR: Input must be TAR file.'