Package fabmetheus_utilities :: Module xml_simple_reader
[hide private]
[frames] | no frames]

Source Code for Module fabmetheus_utilities.xml_simple_reader

  1  """ 
  2  The xml_simple_reader.py script is an xml parser that can parse a line separated xml text. 
  3   
  4  This xml parser will read a line seperated xml text and produce a tree of the xml with a root element.  Each element can have an attribute table, childNodes, a class name, parentNode, text and a link to the root element. 
  5   
  6  This example gets an xml tree for the xml file boolean.xml.  This example is run in a terminal in the folder which contains boolean.xml and xml_simple_reader.py. 
  7   
  8   
  9  > python 
 10  Python 2.5.1 (r251:54863, Sep 22 2007, 01:43:31) 
 11  [GCC 4.2.1 (SUSE Linux)] on linux2 
 12  Type "help", "copyright", "credits" or "license" for more information. 
 13  >>> fileName = 'boolean.xml' 
 14  >>> file = open(fileName, 'r') 
 15  >>> xmlText = file.read() 
 16  >>> file.close() 
 17  >>> from xml_simple_reader import XMLSimpleReader 
 18  >>> xmlParser = XMLSimpleReader(fileName, None, xmlText) 
 19  >>> print( xmlParser ) 
 20    ?xml, {'version': '1.0'} 
 21    ArtOfIllusion, {'xmlns:bf': '//babelfiche/codec', 'version': '2.0', 'fileversion': '3'} 
 22    Scene, {'bf:id': 'theScene'} 
 23    materials, {'bf:elem-type': 'java.lang.Object', 'bf:list': 'collection', 'bf:id': '1', 'bf:type': 'java.util.Vector'} 
 24  .. 
 25  many more lines of the xml tree 
 26  .. 
 27   
 28  """ 
 29   
 30  from fabmetheus_utilities.geometry.geometry_utilities import evaluate 
 31  from fabmetheus_utilities.geometry.geometry_utilities import matrix 
 32  from fabmetheus_utilities import archive 
 33  from fabmetheus_utilities import euclidean 
 34  from fabmetheus_utilities import xml_simple_writer 
 35  import StringIO 
 36   
 37   
 38  __author__ = 'Enrique Perez (perez_enrique@yahoo.com)' 
 39  __credits__ = 'Nophead <http://hydraraptor.blogspot.com/>\nArt of Illusion <http://www.artofillusion.org/>' 
 40  __date__ = '$Date: 2008/21/04 $' 
 41  __license__ = 'GNU Affero General Public License http://www.gnu.org/licenses/agpl.html' 
 42   
 43   
44 -def addXMLLine(line, xmlLines):
45 'Get the all the xml lines of a text.' 46 strippedLine = line.strip() 47 if strippedLine[ : len('<!--') ] == '<!--': 48 endIndex = line.find('-->') 49 if endIndex != - 1: 50 endIndex += len('-->') 51 commentLine = line[: endIndex] 52 remainderLine = line[endIndex :].strip() 53 if len(remainderLine) > 0: 54 xmlLines.append(commentLine) 55 xmlLines.append(remainderLine) 56 return 57 xmlLines.append(line)
58
59 -def getXMLLines(text):
60 'Get the all the xml lines of a text.' 61 accumulatedOutput = None 62 textLines = archive.getTextLines(text) 63 combinedLines = [] 64 lastWord = '>' 65 for textLine in textLines: 66 strippedLine = textLine.strip() 67 firstCharacter = None 68 lastCharacter = None 69 if len( strippedLine ) > 1: 70 firstCharacter = strippedLine[0] 71 lastCharacter = strippedLine[-1] 72 if firstCharacter == '<' and lastCharacter != '>' and accumulatedOutput == None: 73 accumulatedOutput = StringIO.StringIO() 74 accumulatedOutput.write( textLine ) 75 if strippedLine[ : len('<!--') ] == '<!--': 76 lastWord = '-->' 77 else: 78 if accumulatedOutput == None: 79 addXMLLine( textLine, combinedLines ) 80 else: 81 accumulatedOutput.write('\n' + textLine ) 82 if strippedLine[ - len( lastWord ) : ] == lastWord: 83 addXMLLine( accumulatedOutput.getvalue(), combinedLines ) 84 accumulatedOutput = None 85 lastWord = '>' 86 xmlLines = [] 87 for combinedLine in combinedLines: 88 xmlLines += getXMLTagSplitLines(combinedLine) 89 return xmlLines
90
91 -def getXMLTagSplitLines(combinedLine):
92 'Get the xml lines split at a tag.' 93 characterIndex = 0 94 lastWord = None 95 splitIndexes = [] 96 tagEnd = False 97 while characterIndex < len(combinedLine): 98 character = combinedLine[characterIndex] 99 if character == '"' or character == "'": 100 lastWord = character 101 elif combinedLine[characterIndex : characterIndex + len('<!--')] == '<!--': 102 lastWord = '-->' 103 elif combinedLine[characterIndex : characterIndex + len('<![CDATA[')] == '<![CDATA[': 104 lastWord = ']]>' 105 if lastWord != None: 106 characterIndex = combinedLine.find(lastWord, characterIndex + 1) 107 if characterIndex == -1: 108 return [combinedLine] 109 character = None 110 lastWord = None 111 if character == '>': 112 tagEnd = True 113 elif character == '<': 114 if tagEnd: 115 if combinedLine[characterIndex : characterIndex + 2] != '</': 116 splitIndexes.append(characterIndex) 117 characterIndex += 1 118 if len(splitIndexes) < 1: 119 return [combinedLine] 120 xmlTagSplitLines = [] 121 lastSplitIndex = 0 122 for splitIndex in splitIndexes: 123 xmlTagSplitLines.append(combinedLine[lastSplitIndex : splitIndex]) 124 lastSplitIndex = splitIndex 125 xmlTagSplitLines.append(combinedLine[lastSplitIndex :]) 126 return xmlTagSplitLines
127 128
129 -class XMLElement:
130 'An xml element.'
131 - def __init__(self):
132 'Add empty lists.' 133 self.attributeDictionary = {} 134 self.childNodes = [] 135 self.idDictionary = {} 136 self.importName = '' 137 self.localName = '' 138 self.nameDictionary = {} 139 self.parentNode = None 140 self.tagDictionary = {} 141 self.text = '' 142 self.xmlObject = None
143
144 - def __repr__(self):
145 'Get the string representation of this XML element.' 146 return '%s\n%s\n%s' % ( self.localName, self.attributeDictionary, self.text )
147
148 - def _getAccessibleAttribute(self, attributeName):
149 'Get the accessible attribute.' 150 global globalGetAccessibleAttributeSet 151 if attributeName in globalGetAccessibleAttributeSet: 152 return getattr(self, attributeName, None) 153 return None
154
155 - def addAttribute( self, beforeQuote, withinQuote ):
156 'Add the attribute to the dictionary.' 157 beforeQuote = beforeQuote.strip() 158 lastEqualIndex = beforeQuote.rfind('=') 159 if lastEqualIndex < 0: 160 return 161 key = beforeQuote[ : lastEqualIndex ].strip() 162 self.attributeDictionary[key] = withinQuote
163
164 - def addSuffixToID(self, idSuffix):
165 'Add the suffix to the id.' 166 if 'id' in self.attributeDictionary: 167 self.attributeDictionary['id'] += idSuffix
168
170 'Add to the id dictionary if the id key exists in the attribute dictionary.' 171 if 'id' in self.attributeDictionary: 172 idKey = self.getImportNameWithDot() + self.attributeDictionary['id'] 173 self.getRoot().idDictionary[idKey] = self 174 if 'name' in self.attributeDictionary: 175 nameKey = self.getImportNameWithDot() + self.attributeDictionary['name'] 176 euclidean.addElementToListDictionaryIfNotThere(self, nameKey, self.getRoot().nameDictionary) 177 for tagKey in self.getTagKeys(): 178 euclidean.addElementToListDictionaryIfNotThere(self, tagKey, self.getRoot().tagDictionary)
179
180 - def addXML(self, depth, output):
181 'Add xml for this xmlElement.' 182 if self.localName == 'comment': 183 output.write( self.text ) 184 return 185 innerOutput = StringIO.StringIO() 186 xml_simple_writer.addXMLFromObjects(depth + 1, self.childNodes, innerOutput) 187 innerText = innerOutput.getvalue() 188 xml_simple_writer.addBeginEndInnerXMLTag(self.attributeDictionary, depth, innerText, self.localName, output, self.text)
189
190 - def copyXMLChildNodes( self, idSuffix, parentNode ):
191 'Copy the xml childNodes.' 192 for childNode in self.childNodes: 193 childNode.getCopy( idSuffix, parentNode )
194
195 - def getCascadeFloat(self, defaultFloat, key):
196 'Get the cascade float.' 197 if key in self.attributeDictionary: 198 value = evaluate.getEvaluatedFloat(None, key, self) 199 if value != None: 200 return value 201 if self.parentNode == None: 202 return defaultFloat 203 return self.parentNode.getCascadeFloat(defaultFloat, key)
204
205 - def getChildNodesByLocalName(self, localName):
206 'Get the childNodes which have the given class name.' 207 childNodesByLocalName = [] 208 for childNode in self.childNodes: 209 if localName == childNode.localName: 210 childNodesByLocalName.append(childNode) 211 return childNodesByLocalName
212
213 - def getChildNodesByLocalNameRecursively(self, localName):
214 'Get the childNodes which have the given class name recursively.' 215 childNodesByLocalName = self.getChildNodesByLocalName(localName) 216 for childNode in self.childNodes: 217 childNodesByLocalName += childNode.getChildNodesByLocalNameRecursively(localName) 218 return childNodesByLocalName
219
220 - def getCopy(self, idSuffix, parentNode):
221 'Copy the xml element, set its dictionary and add it to the parentNode.' 222 matrix4X4 = matrix.getBranchMatrixSetXMLElement(self) 223 attributeDictionaryCopy = self.attributeDictionary.copy() 224 attributeDictionaryCopy.update(matrix4X4.getAttributeDictionary('matrix.')) 225 copy = self.getCopyShallow(attributeDictionaryCopy) 226 copy.setParentAddToChildNodes(parentNode) 227 copy.addSuffixToID(idSuffix) 228 copy.text = self.text 229 copy.addToIdentifierDictionaryIFIdentifierExists() 230 self.copyXMLChildNodes(idSuffix, copy) 231 return copy
232
233 - def getCopyShallow(self, attributeDictionary=None):
234 'Copy the xml element and set its dictionary and parentNode.' 235 if attributeDictionary == None: # to evade default initialization bug where a dictionary is initialized to the last dictionary 236 attributeDictionary = {} 237 copyShallow = XMLElement() 238 copyShallow.attributeDictionary = attributeDictionary 239 copyShallow.localName = self.localName 240 copyShallow.importName = self.importName 241 copyShallow.parentNode = self.parentNode 242 return copyShallow
243
244 - def getFirstChildByLocalName(self, localName):
245 'Get the first childNode which has the given class name.' 246 for childNode in self.childNodes: 247 if localName == childNode.localName: 248 return childNode 249 return None
250
251 - def getIDSuffix(self, elementIndex=None):
252 'Get the id suffix from the dictionary.' 253 suffix = self.localName 254 if 'id' in self.attributeDictionary: 255 suffix = self.attributeDictionary['id'] 256 if elementIndex == None: 257 return '_%s' % suffix 258 return '_%s_%s' % (suffix, elementIndex)
259
260 - def getImportNameWithDot(self):
261 'Get import name with dot.' 262 if self.importName == '': 263 return '' 264 return self.importName + '.'
265
266 - def getParentParseReplacedLine(self, line, lineStripped, parentNode):
267 'Parse replaced line and return the parentNode.' 268 if lineStripped[: len('<!--')] == '<!--': 269 self.localName = 'comment' 270 self.text = line + '\n' 271 self.setParentAddToChildNodes(parentNode) 272 return parentNode 273 if lineStripped[: len('</')] == '</': 274 if parentNode == None: 275 return parentNode 276 return parentNode.parentNode 277 self.setParentAddToChildNodes(parentNode) 278 cdataBeginIndex = lineStripped.find('<![CDATA[') 279 if cdataBeginIndex != - 1: 280 cdataEndIndex = lineStripped.rfind(']]>') 281 if cdataEndIndex != - 1: 282 cdataEndIndex += len(']]>') 283 self.text = lineStripped[cdataBeginIndex : cdataEndIndex] 284 lineStripped = lineStripped[: cdataBeginIndex] + lineStripped[cdataEndIndex :] 285 self.localName = lineStripped[1 : lineStripped.replace('/>', ' ').replace('>', ' ').replace('\n', ' ').find(' ')] 286 lastWord = lineStripped[-2 :] 287 lineAfterLocalName = lineStripped[2 + len(self.localName) : -1] 288 beforeQuote = '' 289 lastQuoteCharacter = None 290 withinQuote = '' 291 for characterIndex in xrange(len(lineAfterLocalName)): 292 character = lineAfterLocalName[characterIndex] 293 if lastQuoteCharacter == None: 294 if character == '"' or character == "'": 295 lastQuoteCharacter = character 296 character = '' 297 if character == lastQuoteCharacter: 298 self.addAttribute(beforeQuote, withinQuote) 299 beforeQuote = '' 300 lastQuoteCharacter = None 301 withinQuote = '' 302 character = '' 303 if lastQuoteCharacter == None: 304 beforeQuote += character 305 else: 306 withinQuote += character 307 self.addToIdentifierDictionaryIFIdentifierExists() 308 if lastWord == '/>': 309 return parentNode 310 tagEnd = '</%s>' % self.localName 311 if lineStripped[-len(tagEnd) :] == tagEnd: 312 untilTagEnd = lineStripped[: -len(tagEnd)] 313 lastGreaterThanIndex = untilTagEnd.rfind('>') 314 self.text += untilTagEnd[ lastGreaterThanIndex + 1 : ] 315 return parentNode 316 return self
317
318 - def getParser(self):
319 'Get the parser.' 320 return self.getRoot().parser
321
322 - def getPaths(self):
323 'Get all paths.' 324 if self.xmlObject == None: 325 return [] 326 return self.xmlObject.getPaths()
327
328 - def getPreviousVertex(self, defaultVector3=None):
329 'Get previous vertex if it exists.' 330 if self.parentNode == None: 331 return defaultVector3 332 if self.parentNode.xmlObject == None: 333 return defaultVector3 334 if len(self.parentNode.xmlObject.vertexes) < 1: 335 return defaultVector3 336 return self.parentNode.xmlObject.vertexes[-1]
337
338 - def getPreviousXMLElement(self):
339 'Get previous XMLElement if it exists.' 340 if self.parentNode == None: 341 return None 342 previousXMLElementIndex = self.parentNode.childNodes.index(self) - 1 343 if previousXMLElementIndex < 0: 344 return None 345 return self.parentNode.childNodes[previousXMLElementIndex]
346
347 - def getRoot(self):
348 'Get the root element.' 349 if self.parentNode == None: 350 return self 351 return self.parentNode.getRoot()
352
353 - def getSubChildWithID( self, idReference ):
354 'Get the childNode which has the idReference.' 355 for childNode in self.childNodes: 356 if 'bf:id' in childNode.attributeDictionary: 357 if childNode.attributeDictionary['bf:id'] == idReference: 358 return childNode 359 subChildWithID = childNode.getSubChildWithID( idReference ) 360 if subChildWithID != None: 361 return subChildWithID 362 return None
363
364 - def getTagKeys(self):
365 'Get stripped tag keys.' 366 if 'tags' not in self.attributeDictionary: 367 return [] 368 tagKeys = [] 369 tagString = self.attributeDictionary['tags'] 370 if tagString.startswith('='): 371 tagString = tagString[1 :] 372 if tagString.startswith('['): 373 tagString = tagString[1 :] 374 if tagString.endswith(']'): 375 tagString = tagString[: -1] 376 for tagWord in tagString.split(','): 377 tagKey = tagWord.strip() 378 if tagKey != '': 379 tagKeys.append(tagKey) 380 return tagKeys
381
382 - def getValueByKey( self, key ):
383 'Get value by the key.' 384 if key in evaluate.globalElementValueDictionary: 385 return evaluate.globalElementValueDictionary[key](self) 386 if key in self.attributeDictionary: 387 return evaluate.getEvaluatedLinkValue( self.attributeDictionary[key], self ) 388 return None
389
390 - def getVertexes(self):
391 'Get the vertexes.' 392 if self.xmlObject == None: 393 return [] 394 return self.xmlObject.getVertexes()
395
396 - def getXMLElementByID(self, idKey):
397 'Get the xml element by id.' 398 idDictionary = self.getRoot().idDictionary 399 if idKey in idDictionary: 400 return idDictionary[idKey] 401 return None
402
403 - def getXMLElementByImportID(self, idKey):
404 'Get the xml element by import file name and id.' 405 return self.getXMLElementByID( self.getImportNameWithDot() + idKey )
406
407 - def getXMLElementsByImportName(self, name):
408 'Get the xml element by import file name and name.' 409 return self.getXMLElementsByName( self.getImportNameWithDot() + name )
410
411 - def getXMLElementsByName(self, name):
412 'Get the xml elements by name.' 413 nameDictionary = self.getRoot().nameDictionary 414 if name in nameDictionary: 415 return nameDictionary[name] 416 return None
417
418 - def getXMLElementsByTag(self, tag):
419 'Get the xml elements by tag.' 420 tagDictionary = self.getRoot().tagDictionary 421 if tag in tagDictionary: 422 return tagDictionary[tag] 423 return None
424
425 - def getXMLProcessor(self):
426 'Get the xmlProcessor.' 427 return self.getRoot().xmlProcessor
428
429 - def linkObject(self, xmlObject):
430 'Link self to xmlObject and add xmlObject to archivableObjects.' 431 self.xmlObject = xmlObject 432 self.xmlObject.xmlElement = self 433 self.parentNode.xmlObject.archivableObjects.append(self.xmlObject)
434
435 - def printAllVariables(self):
436 'Print all variables.' 437 print('attributeDictionary') 438 print(self.attributeDictionary) 439 print('childNodes') 440 print(self.childNodes) 441 print('idDictionary') 442 print(self.idDictionary) 443 print('importName') 444 print(self.importName) 445 print('localName') 446 print(self.localName) 447 print('nameDictionary') 448 print(self.nameDictionary) 449 print('parentNode') 450 print(self.parentNode) 451 print('tagDictionary') 452 print(self.tagDictionary) 453 print('text') 454 print(self.text) 455 print('xmlObject') 456 print(self.xmlObject) 457 print('')
458
459 - def printAllVariablesRoot(self):
460 'Print all variables and the root variables.' 461 self.printAllVariables() 462 root = self.getRoot() 463 if root != None and root != self: 464 print('') 465 print('Root variables:') 466 root.printAllVariables()
467
469 'Remove the childNodes from the id and name dictionaries and the childNodes.' 470 childNodesCopy = self.childNodes[:] 471 for childNode in childNodesCopy: 472 childNode.removeFromIDNameParent()
473
474 - def removeFromIDNameParent(self):
475 'Remove this from the id and name dictionaries and the childNodes of the parentNode.' 476 self.removeChildNodesFromIDNameParent() 477 if 'id' in self.attributeDictionary: 478 idDictionary = self.getRoot().idDictionary 479 idKey = self.getImportNameWithDot() + self.attributeDictionary['id'] 480 if idKey in idDictionary: 481 del idDictionary[idKey] 482 if 'name' in self.attributeDictionary: 483 nameDictionary = self.getRoot().nameDictionary 484 nameKey = self.getImportNameWithDot() + self.attributeDictionary['name'] 485 euclidean.removeElementFromListTable(self, nameKey, nameDictionary) 486 for tagKey in self.getTagKeys(): 487 euclidean.removeElementFromListTable(self, tagKey, self.getRoot().tagDictionary) 488 if self.parentNode != None: 489 self.parentNode.childNodes.remove(self)
490
491 - def setParentAddToChildNodes(self, parentNode):
492 'Set the parentNode and add this to its childNodes.' 493 self.parentNode = parentNode 494 if self.parentNode != None: 495 self.parentNode.childNodes.append(self)
496 497
498 -class XMLSimpleReader:
499 'A simple xml parser.'
500 - def __init__(self, fileName, parentNode, xmlText):
501 'Add empty lists.' 502 self.beforeRoot = '' 503 self.fileName = fileName 504 self.isXML = False 505 self.numberOfWarnings = 0 506 self.parentNode = parentNode 507 self.root = None 508 if parentNode != None: 509 self.root = parentNode.getRoot() 510 self.lines = getXMLLines(xmlText) 511 for self.lineIndex, line in enumerate(self.lines): 512 self.parseLine(line) 513 self.xmlText = xmlText
514
515 - def __repr__(self):
516 'Get the string representation of this parser.' 517 return str( self.root )
518
519 - def getOriginalRoot(self):
520 'Get the original reparsed root element.' 521 if evaluate.getEvaluatedBoolean(True, 'getOriginalRoot', self.root): 522 return XMLSimpleReader(self.fileName, self.parentNode, self.xmlText).root 523 return None
524
525 - def getRoot(self):
526 'Get the root element.' 527 return self.root
528
529 - def parseLine(self, line):
530 'Parse an xml line and add it to the xml tree.' 531 lineStripped = line.strip() 532 if len( lineStripped ) < 1: 533 return 534 if lineStripped.startswith('<?xml'): 535 self.isXML = True 536 return 537 if not self.isXML: 538 if self.numberOfWarnings < 1: 539 print('Warning, xml file should start with <?xml.') 540 print('Until it does, parseLine in XMLSimpleReader will do nothing for:') 541 print(self.fileName) 542 self.numberOfWarnings += 1 543 return 544 xmlElement = XMLElement() 545 self.parentNode = xmlElement.getParentParseReplacedLine( line, lineStripped, self.parentNode ) 546 if self.root != None: 547 return 548 lowerLocalName = xmlElement.localName.lower() 549 if lowerLocalName == 'comment' or lowerLocalName == '!doctype': 550 return 551 self.root = xmlElement 552 self.root.parser = self 553 for line in self.lines[ : self.lineIndex ]: 554 self.beforeRoot += line + '\n'
555 556 557 globalGetAccessibleAttributeSet = set('getPaths getPreviousVertex getPreviousXMLElement getVertexes parent'.split()) 558