#!/usr/bin/python2 #gztar.py - a DEMpy/PullSDTS module - 6 Dec 01 #Update at http://www.3dartist.com/WP/pullsdts/ #Created by Bill Allen """ This module is common to both DEMpy and PullSDTS. It contains functions for extracting from gzip and TAR files, and for writing gzip files from existing files or from a file-like object (FLO), which is supported in the pulldem module, and which can be used as a buffer object to progressively archive a file to gzip without that file pre-existing as a file by itself. ------ Index, functions: gzOpen - extract from .gz files to disk or FLO test - test this module's functions and classes when it is run by itself Index, classes: gzClass - create .gz from disk file or FLO, or actively from FLO as buffer tarClass - scan .tar as disk file or FLO, extract to disk or FLO tarDirClass - hold info on one constituent of a TAR file """ #--- modules & constants --- import os, struct, sys, time, zlib from string import lower thisDir = os.path.split(sys.argv[0])[0] if not thisDir in sys.path: sys.path.append(thisDir) from pulldem import fileLikeObject, fixNonAscii, fixPath, timeAdjust GZBLKSZ, TARBLKSZ = 1024, 512 #block sizes #--- functions --- def gzOpen(gzPath,savePath='',nameOver=0,makeFLO=0): """ gzOpen(path[,savePath[,overwritename[,makeFLO]]]) ------ This function presumes, per the gzip standard, that there will be only one file archived in a .gz/.tar.gz/.tgz file. If there are files in addition to the first, they will likely cause CRC32 and size check failures, but otherwise will be ignored without notice to the user. Use nameOver=1 to extract the original archived file name. Use makeFLO=1 to return a fileLikeObject containing the extracted file instead of extracting a file to disk. Note that the gzOpen function has no direct relationship with gzClass. This is handled as a function rather than as a class because, without random accessability, there's no reason to do more than just open a read-only gzip disk file and extract from it. """ if not lower(gzPath[-3:]) in ['.gz','.tgz']: raise IOError,'gzOpen: Not a gzip file' gz = open(gzPath,'rb') if gz.read(2) != '\037\213': raise IOError,'gzOpen: Not a gzip file' if ord(gz.read(1)) != 8: raise IOError,'gzOpen: Unsupported compression method' flag = ord(gz.read(1)) dtu = struct.unpack(' 0: dtu = long(dtu) #allows float input else: if physical: t = os.path.getmtime(fn) if os.name == 'mac' and t > 2082816000.0: t = t - 2082816000.0 dtu = long(t) else: t = time.time() #for file-like obj, this will be the if os.name == 'mac': # time the file write started, t = t - 2082816000.0 # not ended dtu = long(t) gz.write(struct.pack(' -1: self.sdtsAttr = e.name[0:p] #get the transfer group name break #end def __init__ def cleanup(self): if not self.closed: self.close() self.closed = 1 for f in self.killList: if os.path.exists(f): try: os.remove(f) except OSError: #file moved, deleted, or maybe locked pass self.killList = [] def close(self): self.tar.close() #not self.close(), which would be reentrant self.closed = 1 self.cleanup() def deFold(self,n): """ As a simple solution, any folder names found with file names are discarded and all files are dumped into the same destination folder. NOTE: Duplicate file names get "_" characters inserted. """ n1 = n if n: if n[len(n)-1] == '/': n = '' while 1: p = n.find('/') #presumes forward slash always if p > -1: #loop through any slashed names n = n[p+1:len(n)] # (folder names) else: #stop looping if no slashes found if n == '': n = n1 #probably empty file as folder creator while n in self.fileNames: #duplicate file name found, so p = n.find('.') # append or insert a "_" if p == -1: # underscore for the name n = n + '_' else: n = n[0:p] + '_' + n[p:len(n)] self.fileNames.append(n) #add to file name list return n #end def deFold def getHeader(self,block,timeSort=0): fileNm = block[0:99] #file name is in 1st 99 bytes fileNm = fileNm[0:fileNm.find('\0')] fileNm = self.deFold(fileNm) #deal with any folder names fileLen = int(block[124:135],8) #convert from octal if timeSort: #octal time as sortable string modTime = fixDateTuple(time.ctime(timeAdjust(int(block[136:147],8)))) else: #octal time as integer modTime = timeAdjust(int(block[136:147],8)) try: cksum = int(block[148:155],8) except ValueError: print 'DEBUG gztar TAR checksum ValueError on',fileNm L = 256 #where did 256 come from? for i in range(0,148): L = L + ord(block[i]) for i in range(156,512): #skip the checksum's own 8 bytes L = L + ord(block[i]) if L != cksum: print 'DEBUG gztar TAR checksum error on',fileNm,str(cksum),str(L) #print fixNonAscii(block[146:157],0,1),int(block[148:155],8),L,fileNm return fileNm,fileLen,modTime def isFLO(self): return isinstance(self.tar,fileLikeObject) def isHeader(self,block): magic = block[257:264] p = magic.find('\0') if p == -1: p = magic.find(' ') if p == -1: p = len(magic) if magic[0] != '\0': magic = magic[0:p] if magic in ['GNUtar','ustar'] \ or (magic == '\0\0\0\0\0\0\0' and block[0] != '\0' and block[106:108] == ' \0' and block[114:116] == ' \0' and block[122:124] == ' \0' and block[TARBLKSZ-1] == '\0'): #old TAR header format return 1 else: return 0 #end isHeader def scan(self): data = self.tar.read(TARBLKSZ) ptr = 0 cnt = 0 if not self.isHeader(data): raise IOError,'tarClass: Not a TAR file, magic = ' \ +fixNonAscii(data[257:264]) self.tar.seek(0) #set to read the first block again eof = 0 while 1: data = self.tar.read(TARBLKSZ) if not data: #end of file break ptr = ptr + TARBLKSZ fptr = ptr if self.isHeader(data): #if not, then it is filler, so loop h = self.getHeader(data) if h[1] == 0: continue #loop c = h[1]/TARBLKSZ #get count of full blocks if h[1] % TARBLKSZ > 0: #see if there is one more block c = c + 1 for i in range(c): data = self.tar.read(TARBLKSZ) if not data: eof = 1 break ptr = ptr + TARBLKSZ if eof: #if premature EOF, don't add the file to the break # directory because it is filler or flawed else: cnt = cnt + 1 self.dir.append(tarDirClass(cnt,fptr,h)) self.dirLenMax['order'] = len(str(len(self.dir))) for e in self.dir: #get longest line length for each field c = len(e.name) if c > self.dirLenMax['name']: self.dirLenMax['name'] = c c = len(e.type) if c > self.dirLenMax['type']: self.dirLenMax['type'] = c c = len(str(e.time)) if c > self.dirLenMax['time']: #needed for sorting later self.dirLenMax['time'] = c c = len(str(e.size)) if c > self.dirLenMax['size']: self.dirLenMax['size'] = c #end def scan #--- TAR extraction routines --- """ extract() pulls out a single file by its order within the TAR extractName() performs a lookup on a given name & then calls extract() extractAll() pulls out all the files within a TAR using extract() """ def extract(self,order,savePath='',add2kill=0,makeFLO=0): #find this file & create new disk file or FLO e = self.dir[order] if e.type != ' ': #not a folder self.tar.seek(e.pos) b = self.tar.read(e.size) fn = fixPath(savePath,e.name) f = open(fn,'wb') f.write(b) f.close() if add2kill: self.killList.append(fn) else: #don't need time attribute on a temp file if os.name != 'mac': os.utime(fn,(time.time(),float(e.time))) def extractName(self,name,savePath='',add2kill=0,makeFLO=0): for e in self.dir: if e.name.upper() == name.upper(): self.extract(e.order-1,savePath,add2kill,makeFLO) return 1 return 0 def extractAll(self,savePath='',add2kill=0,makeFLO=0): for i in range(len(self.dir)): self.extract(i,savePath,add2kill,makeFLO) """ #--- debug routines --- def reportDir(self): # ** just for debug for e in self.dir: print e.order,e.name,e.type,e.size,e.time def store(self,path): # ** just for debug if self.isFLO: self.tar.store(path) """ #end class tarClass class tarDirClass: """ tarDirClass(fileorder,filestart,(filename,filelen,filetime)) ------ This puts into one entry a file's name, location and length within the TAR, and time/date, as well as original order within the TAR and its individual file type. This is a class rather than a dictionary so that its instances can be sorted and resorted for file listings. """ def __init__(self,cnt,ptr,hdrTup): self.order = cnt #order within TAR file, 1st, 2nd, etc. self.pos = ptr #byte position within TAR file self.ptr = 0 #ptr within subfile self.name = hdrTup[0] self.size = hdrTup[1] self.time = hdrTup[2] if self.name.find('/') > -1 or self.size == 0: self.type = ' ' #it's a folder name, sort to the top elif self.name.lower() == 'readme': self.type = 'txt' #it's a text file elif self.name.find('.') == -1: self.type = '!' #unknown file type else: p = self.name.rfind('.') self.type = self.name[self.name.rfind('.')+1:] #end class tarDirClass def test(): print '\tTesting create new .txt.gz file & extracting it...' nm = 'test_gz_py.txt' fn = fixPath(thisDir,nm+'.gz') flo = fileLikeObject(nm) flo.open('w') t = 'This is a test of the gz.py module. '*50 + '\n' flo.write(t) flo.close() gz = gzClass(fn[:-3],'',0,flo) gzOpen(fn,'') print '\t\tDone -',nm,'extracted from',nm+'.gz' """ fn = 'C:\\Python\\DemTest\\o37110f5.d10.tar.gz' tar = tarClass(fn,'C:\\Python\\DEMpy\\tmp') print 'Is this an FLO?:',['no','yes'][tar.isFLO()] tar.reportDir() tar.cleanup() print 'Is the TAR closed?:',['no','yes'][tar.closed] """ print '\t(tarClass was not tested)' if __name__ == '__main__': print '\nThis module (gztar.py) contains functions and classes for', \ '\nreading gzip and TAR files, and for writing gzip files.', \ '\n\tMore info at http://www.3dartist.com/WP/pullsdts/', \ '\nTesting may now follow...' test() """ for e in tar.dir: if e.name.lower().find('iden.ddf') > -1: if tf: tar.tar.seek(0,0,e) #for FLO seek else: tar.tar.seek(e.pos+e.ptr,0) #for physical seek print tar.tar.read(e.size) break """ ### end module ###