#!/usr/bin/python2 #pds.py - a Bippy module - 17 August 2004 - 0.0.4 #an original script written by Bill Allen for Python 2+ #this script has no price, limitations, or warranty - use at your own risk """ This module handles image files in the NASA Planetary Data System PDS format used for archiving data and images from NASA planetary missions. http://pds.jpl.nasa.gov/aboutpds.html http://pds.jpl.nasa.gov/documents/ - file format documentation PDS is a very flexible tagged format similar to FITS and VICAR that uses an object description language (ODL). Like those formats, PDS is not very well supported by commonly available graphics software. Notes: The PDS format is also used to store data other than images. PDS files may use the .pds file name extension, but PDS image files often use .img, and other extensions are used such as .tab (data) and .lbl (label description). function index -------------- parsePDS is presently the only function here There is room for a number of improvements in the parsePDS function that will be added when suitable PDS test files can be found: --converting signed image data to unsigned --handling RGB interleaved, multi-pane, and/or multi-file channels --handling floating point image data --any PDS image file that this module fails to handle --shape model & digital elevation PDS files (e.g., asteroid shapes) If you find such files, please describe them to the script author SUBJECT: Python but do _NOT_ send file attachments without first checking. Some other features could be implemented from files already available for testing, but appear not to be necessary: --handling "window objects," image areas within raster data (this is seen with star tracker images) --handling histograms --applying 12-bit masking to 16-bit data stored as two-byte data history 0.0.1 - first version 6 April 2004 0.0.2 - 15 April 2004: made keephead do something 0.0.3 - 16 Aug. 2004: - added imageDataType ['16','MSB_INTEGER'], thanks to Daniel Crotty - implemented header values dictionary with parsePDSvals function 0.0.4 - 17 Aug. 2004: added ability to obtain only header information """ #--- modules & constants --- import array, os, sys from string import join, split, strip thisDir = os.path.split(sys.argv[0])[0] if not thisDir in sys.path: sys.path.append(thisDir) import raster from common import fixPath #--- functions --- def pdsHeaderVals(list,vals): """ This function returns all or only requested header values. To get all, call this function with vals=['all']. (Note that vals must be a list.) Where objects are declared in the header, these are keyed with objectName:key, so to get the MEAN for the IMAGE object, call with vals=['IMAGE:MEAN'], and then the image mean will equal float(imgObj.values['IMAGE:MEAN']). A list of objects is passed as imgObj.values['objects'], and the original order of the header values as imgObj.values['keylist']. """ if vals in [[],[None],[''],['none']]: return {} #empty dictionary dict = {'objects':[],'keylist':[]} #keylist = original order of keys obj = '' key = '' for s in list: if s.find('=') == -1: #not a line of key = data if s[0] == '/': continue #it's a comment if s == 'END': break #it's the end if key: if dict[key][-1] in ['"',' ']: dict[key] += s #concatenate run-on string else: dict[key] += ' ' + s continue print 'pds:pdsHeaderVals error: don\'t know what to do with' print '\t',s continue t = split(s,'=') t[0] = strip(t[0]) t[1] = strip(t[1]) if t[0] == 'OBJECT': #begin object values obj = t[1] key = '' dict['objects'].append(obj) continue if t[0] == 'END_OBJECT': #end object values obj = '' key = '' continue if obj: key = obj + ':' + t[0] else: key = t[0] dict['keylist'].append(key) dict[key] = t[1] retval ={'objects':dict['objects'],'keylist':[]} for key in dict['keylist']: if vals == ['all'] or key in vals: retval['keylist'].append(key) retval[key] = dict[key] #--- end debug return retval #end def pdsHeaderValues def parsePDS(pn,fn='',keephead=0,showpad=0,verbose=0,vals=[],headonly=0): """ If successful, this function returns a raster:rasterClassObject (rasObj). Note that two values are added to this class, empty if not requested: imgObj.header = full text of PDS header, if keephead=1 imgObj.values = dictionary of header values, if not vals=[] arguments - only pn is required pn - path name, can include file name if fn is empty fn - file name to be combined with pn keephead - Boolean re storing the PDS header intact in the rasObj showpad - Boolean re keeping an image's padded area (if there is any left-right padded area, usually undesired) verbose - print reports such as of PDS header lines found without "=" vals - list of no/all/one or more header values to be returned if found =[] to return nothing =['all'] to return everything =['TARGET_NAME'] for a general TARGET_NAME value in the header =['IMAGE:MEAN'] for the MEAN value of the IMAGE object headonly - Booean to return a dummy object with just the requested header info, but NOTE that keephead has to be =1 or vals has to be !=[] notes: --Use showpad=1 to suppress prefix/suffix instruction in order to inspect the entire image area (may hold clues to image problems or maybe data). --The histogram, if present, is noted but not made available to the caller. --Window objects (areas within an image) are not yet supported. --Bit masking is not yet implemented. (Is there a need for this?) """ errPfx = 'bippy:pds:parsePDS: ' if fn: pn = fixPath(pn,fn) #can pass path & file names separately else: pn = fixPath(pn) # or one name for fixPath to normalize endFound = 0 #triggers switch from header to data pdsVersion = '' #PDS version histoPtr = None #location of histogram data first byte imagePtr = None #location of image data first byte imageArrayType = None #Python array data type imageDataType = ['',''] #PDS data type imageHeaderFlag = 0 #flags reading image header details pfxCount = 0 #leading empty bytes per record recBytes = None #data record length (columns) recCount = None #data records count (rows) sfxCount = 0 #trailing empty bytes per record if vals != []: list = [] f = open(pn,'rb') #must be 'rb', not 'r'! while 1: s = f.readline() #get a line if not s: break #end of file s = strip(s) #strip white space & CR/LF if not s: continue #blank, so loop if vals != []: list.append(s) if s[0:3] == ' ' and s.find('=') == -1: continue #it's run-on text not needed here t = split(s) if not pdsVersion: #must first verify this is a PDS file if t[0] == 'PDS_VERSION_ID' and t[1] == '=': pdsVersion = t[2] continue else: #not a PDS file, get out of here f.close() return None,'not a Planetary Data System PDS file' #the following is organized as start, end, pointer & other statements if len(t) < 3: if t[0] == 'END': endFound = 1 if keephead: ptr = f.tell() f.seek(0) header = f.read(ptr) if headonly and (keephead or vals != []): hList = ['>'+errPfx[:-2],'>PDS version: '+pdsVersion, '>path '+pn] imgObj = raster.rasterDummyClass(history=hList, caller=errPfx[:-2]) if keephead: imgObj.header = header else: imgObj.header = '' if vals == []: imgObj.values = {} else: imgObj.values = pdsHeaderVals(list,vals) return imgObj,'' break elif verbose: print ' NO "=" in:',s elif t[0] == 'OBJECT' and t[2] == 'IMAGE': imageHeaderFlag = 1 elif t[0] == 'END_OBJECT': if t[2] == 'IMAGE' and imageHeaderFlag: imageHeaderFlag = 0 elif t[0] == '^IMAGE': imagePtr = int(t[2]) elif t[0] == '^IMAGE_HISTOGRAM': histoPtr = int(t[2]) elif t[0] == 'FILE_RECORDS': recCount = int(t[2]) elif t[0] == 'LINE_PREFIX_BYTES': if imageHeaderFlag: pfxCount = int(t[2]) elif t[0] == 'LINE_SUFFIX_BYTES': if imageHeaderFlag: sfxCount = int(t[2]) elif t[0] == 'LINES': if imageHeaderFlag: rowCount = int(t[2]) elif t[0] == 'LINE_SAMPLES': if imageHeaderFlag: colCount = int(t[2]) elif t[0] == 'RECORD_BYTES': recBytes = int(t[2]) elif t[0] == 'SAMPLE_BITS': if imageHeaderFlag: imageDataType[0] = t[2] elif t[0] == 'SAMPLE_TYPE': if imageHeaderFlag: imageDataType[1] = t[2] if not endFound: f.close() return None,'PDS header END not found' #--- read the data --- if showpad: #override padding removal pfxCount = 0 sfxCount = 0 if imageDataType == ['8','MSB_UNSIGNED_INTEGER']: imageArrayType = 'B' elif imageDataType == ['16','MSB_INTEGER']: imageArrayType = 'H' elif imageDataType == ['16','MSB_UNSIGNED_INTEGER']: imageArrayType = 'H' else: f.close() return None,join(imageDataType,'-bit ')+' not implemented' if imagePtr: #rowCount = recCount - imagePtr f.seek((imagePtr-1)*recBytes) #PDS has first record = 1, needs to be 0 note = ' start from imagePtr, WxH = ' + str(recBytes) + 'x' \ + str(rowCount) else: #rowCount = recCount - start start = int(float(ptr)/float(recBytes)) + 1 f.seek(recBytes*start) note = ' start from calculated, WxH = ' + str(recBytes) + 'x' \ + str(rowCount) image = [] try: if imageDataType[0] == '16': #PDS gives in total bytes, but Python recBytes = recBytes / 2 # read into array wants total words for y in range(rowCount): a = array.array(imageArrayType) a.fromfile(f,recBytes) if sfxCount > 0: image.append(a[pfxCount:-sfxCount]) else: image.append(a[pfxCount:]) except IOError, (errno, strerror): s = 'I/O error(%s): %s'%(errno,strerror) f.close() return None,s except: raise #***DEBUG f.close() #--- compatibility: PDS is always big-endian, but PCs are little-endian if sys.byteorder == 'little' and imageArrayType in ['H']: for y in range(len(image)): image[y].byteswap() #--- finish with history list, header & header dictionary hList = ['>'+errPfx[:-2],'>PDS version: '+pdsVersion,'>path '+pn,note] if pfxCount > 0 or sfxCount > 0: hList.append(' prefix='+str(pfxCount)+', suffix='+str(sfxCount)) imgObj = raster.rasterObjectClass(data=image,history=hList, caller=errPfx[:-2]) if keephead: imgObj.header = header else: imgObj.header = '' if vals == []: imgObj.values = {} else: imgObj.values = pdsHeaderVals(list,vals) return imgObj,'' #end def parsePDS #--- testing --- if __name__ == '__main__': print """ This module (pds.py) is not a standalone program. It is a module that contains a PDS file-reading function used by other scripts. """ ### end module ###