1 """
2 The xml_simple_reader.py script is an xml parser that can parse a line separated xml text.
3
4 This xml parser will read a line seperated xml text and produce a tree of the xml with a root element. Each element can have an attribute table, childNodes, a class name, parentNode, text and a link to the root element.
5
6 This example gets an xml tree for the xml file boolean.xml. This example is run in a terminal in the folder which contains boolean.xml and xml_simple_reader.py.
7
8
9 > python
10 Python 2.5.1 (r251:54863, Sep 22 2007, 01:43:31)
11 [GCC 4.2.1 (SUSE Linux)] on linux2
12 Type "help", "copyright", "credits" or "license" for more information.
13 >>> fileName = 'boolean.xml'
14 >>> file = open(fileName, 'r')
15 >>> xmlText = file.read()
16 >>> file.close()
17 >>> from xml_simple_reader import XMLSimpleReader
18 >>> xmlParser = XMLSimpleReader(fileName, None, xmlText)
19 >>> print( xmlParser )
20 ?xml, {'version': '1.0'}
21 ArtOfIllusion, {'xmlns:bf': '//babelfiche/codec', 'version': '2.0', 'fileversion': '3'}
22 Scene, {'bf:id': 'theScene'}
23 materials, {'bf:elem-type': 'java.lang.Object', 'bf:list': 'collection', 'bf:id': '1', 'bf:type': 'java.util.Vector'}
24 ..
25 many more lines of the xml tree
26 ..
27
28 """
29
30 from fabmetheus_utilities.geometry.geometry_utilities import evaluate
31 from fabmetheus_utilities.geometry.geometry_utilities import matrix
32 from fabmetheus_utilities import archive
33 from fabmetheus_utilities import euclidean
34 from fabmetheus_utilities import xml_simple_writer
35 import StringIO
36
37
38 __author__ = 'Enrique Perez (perez_enrique@yahoo.com)'
39 __credits__ = 'Nophead <http://hydraraptor.blogspot.com/>\nArt of Illusion <http://www.artofillusion.org/>'
40 __date__ = '$Date: 2008/21/04 $'
41 __license__ = 'GNU Affero General Public License http://www.gnu.org/licenses/agpl.html'
42
43
45 'Get the all the xml lines of a text.'
46 strippedLine = line.strip()
47 if strippedLine[ : len('<!--') ] == '<!--':
48 endIndex = line.find('-->')
49 if endIndex != - 1:
50 endIndex += len('-->')
51 commentLine = line[: endIndex]
52 remainderLine = line[endIndex :].strip()
53 if len(remainderLine) > 0:
54 xmlLines.append(commentLine)
55 xmlLines.append(remainderLine)
56 return
57 xmlLines.append(line)
58
60 'Get the all the xml lines of a text.'
61 accumulatedOutput = None
62 textLines = archive.getTextLines(text)
63 combinedLines = []
64 lastWord = '>'
65 for textLine in textLines:
66 strippedLine = textLine.strip()
67 firstCharacter = None
68 lastCharacter = None
69 if len( strippedLine ) > 1:
70 firstCharacter = strippedLine[0]
71 lastCharacter = strippedLine[-1]
72 if firstCharacter == '<' and lastCharacter != '>' and accumulatedOutput == None:
73 accumulatedOutput = StringIO.StringIO()
74 accumulatedOutput.write( textLine )
75 if strippedLine[ : len('<!--') ] == '<!--':
76 lastWord = '-->'
77 else:
78 if accumulatedOutput == None:
79 addXMLLine( textLine, combinedLines )
80 else:
81 accumulatedOutput.write('\n' + textLine )
82 if strippedLine[ - len( lastWord ) : ] == lastWord:
83 addXMLLine( accumulatedOutput.getvalue(), combinedLines )
84 accumulatedOutput = None
85 lastWord = '>'
86 xmlLines = []
87 for combinedLine in combinedLines:
88 xmlLines += getXMLTagSplitLines(combinedLine)
89 return xmlLines
90
127
128
130 'An xml element.'
132 'Add empty lists.'
133 self.attributeDictionary = {}
134 self.childNodes = []
135 self.idDictionary = {}
136 self.importName = ''
137 self.localName = ''
138 self.nameDictionary = {}
139 self.parentNode = None
140 self.tagDictionary = {}
141 self.text = ''
142 self.xmlObject = None
143
145 'Get the string representation of this XML element.'
146 return '%s\n%s\n%s' % ( self.localName, self.attributeDictionary, self.text )
147
154
156 'Add the attribute to the dictionary.'
157 beforeQuote = beforeQuote.strip()
158 lastEqualIndex = beforeQuote.rfind('=')
159 if lastEqualIndex < 0:
160 return
161 key = beforeQuote[ : lastEqualIndex ].strip()
162 self.attributeDictionary[key] = withinQuote
163
165 'Add the suffix to the id.'
166 if 'id' in self.attributeDictionary:
167 self.attributeDictionary['id'] += idSuffix
168
179
180 - def addXML(self, depth, output):
181 'Add xml for this xmlElement.'
182 if self.localName == 'comment':
183 output.write( self.text )
184 return
185 innerOutput = StringIO.StringIO()
186 xml_simple_writer.addXMLFromObjects(depth + 1, self.childNodes, innerOutput)
187 innerText = innerOutput.getvalue()
188 xml_simple_writer.addBeginEndInnerXMLTag(self.attributeDictionary, depth, innerText, self.localName, output, self.text)
189
191 'Copy the xml childNodes.'
192 for childNode in self.childNodes:
193 childNode.getCopy( idSuffix, parentNode )
194
196 'Get the cascade float.'
197 if key in self.attributeDictionary:
198 value = evaluate.getEvaluatedFloat(None, key, self)
199 if value != None:
200 return value
201 if self.parentNode == None:
202 return defaultFloat
203 return self.parentNode.getCascadeFloat(defaultFloat, key)
204
206 'Get the childNodes which have the given class name.'
207 childNodesByLocalName = []
208 for childNode in self.childNodes:
209 if localName == childNode.localName:
210 childNodesByLocalName.append(childNode)
211 return childNodesByLocalName
212
214 'Get the childNodes which have the given class name recursively.'
215 childNodesByLocalName = self.getChildNodesByLocalName(localName)
216 for childNode in self.childNodes:
217 childNodesByLocalName += childNode.getChildNodesByLocalNameRecursively(localName)
218 return childNodesByLocalName
219
220 - def getCopy(self, idSuffix, parentNode):
232
234 'Copy the xml element and set its dictionary and parentNode.'
235 if attributeDictionary == None:
236 attributeDictionary = {}
237 copyShallow = XMLElement()
238 copyShallow.attributeDictionary = attributeDictionary
239 copyShallow.localName = self.localName
240 copyShallow.importName = self.importName
241 copyShallow.parentNode = self.parentNode
242 return copyShallow
243
245 'Get the first childNode which has the given class name.'
246 for childNode in self.childNodes:
247 if localName == childNode.localName:
248 return childNode
249 return None
250
252 'Get the id suffix from the dictionary.'
253 suffix = self.localName
254 if 'id' in self.attributeDictionary:
255 suffix = self.attributeDictionary['id']
256 if elementIndex == None:
257 return '_%s' % suffix
258 return '_%s_%s' % (suffix, elementIndex)
259
261 'Get import name with dot.'
262 if self.importName == '':
263 return ''
264 return self.importName + '.'
265
267 'Parse replaced line and return the parentNode.'
268 if lineStripped[: len('<!--')] == '<!--':
269 self.localName = 'comment'
270 self.text = line + '\n'
271 self.setParentAddToChildNodes(parentNode)
272 return parentNode
273 if lineStripped[: len('</')] == '</':
274 if parentNode == None:
275 return parentNode
276 return parentNode.parentNode
277 self.setParentAddToChildNodes(parentNode)
278 cdataBeginIndex = lineStripped.find('<![CDATA[')
279 if cdataBeginIndex != - 1:
280 cdataEndIndex = lineStripped.rfind(']]>')
281 if cdataEndIndex != - 1:
282 cdataEndIndex += len(']]>')
283 self.text = lineStripped[cdataBeginIndex : cdataEndIndex]
284 lineStripped = lineStripped[: cdataBeginIndex] + lineStripped[cdataEndIndex :]
285 self.localName = lineStripped[1 : lineStripped.replace('/>', ' ').replace('>', ' ').replace('\n', ' ').find(' ')]
286 lastWord = lineStripped[-2 :]
287 lineAfterLocalName = lineStripped[2 + len(self.localName) : -1]
288 beforeQuote = ''
289 lastQuoteCharacter = None
290 withinQuote = ''
291 for characterIndex in xrange(len(lineAfterLocalName)):
292 character = lineAfterLocalName[characterIndex]
293 if lastQuoteCharacter == None:
294 if character == '"' or character == "'":
295 lastQuoteCharacter = character
296 character = ''
297 if character == lastQuoteCharacter:
298 self.addAttribute(beforeQuote, withinQuote)
299 beforeQuote = ''
300 lastQuoteCharacter = None
301 withinQuote = ''
302 character = ''
303 if lastQuoteCharacter == None:
304 beforeQuote += character
305 else:
306 withinQuote += character
307 self.addToIdentifierDictionaryIFIdentifierExists()
308 if lastWord == '/>':
309 return parentNode
310 tagEnd = '</%s>' % self.localName
311 if lineStripped[-len(tagEnd) :] == tagEnd:
312 untilTagEnd = lineStripped[: -len(tagEnd)]
313 lastGreaterThanIndex = untilTagEnd.rfind('>')
314 self.text += untilTagEnd[ lastGreaterThanIndex + 1 : ]
315 return parentNode
316 return self
317
319 'Get the parser.'
320 return self.getRoot().parser
321
323 'Get all paths.'
324 if self.xmlObject == None:
325 return []
326 return self.xmlObject.getPaths()
327
329 'Get previous vertex if it exists.'
330 if self.parentNode == None:
331 return defaultVector3
332 if self.parentNode.xmlObject == None:
333 return defaultVector3
334 if len(self.parentNode.xmlObject.vertexes) < 1:
335 return defaultVector3
336 return self.parentNode.xmlObject.vertexes[-1]
337
339 'Get previous XMLElement if it exists.'
340 if self.parentNode == None:
341 return None
342 previousXMLElementIndex = self.parentNode.childNodes.index(self) - 1
343 if previousXMLElementIndex < 0:
344 return None
345 return self.parentNode.childNodes[previousXMLElementIndex]
346
348 'Get the root element.'
349 if self.parentNode == None:
350 return self
351 return self.parentNode.getRoot()
352
354 'Get the childNode which has the idReference.'
355 for childNode in self.childNodes:
356 if 'bf:id' in childNode.attributeDictionary:
357 if childNode.attributeDictionary['bf:id'] == idReference:
358 return childNode
359 subChildWithID = childNode.getSubChildWithID( idReference )
360 if subChildWithID != None:
361 return subChildWithID
362 return None
363
365 'Get stripped tag keys.'
366 if 'tags' not in self.attributeDictionary:
367 return []
368 tagKeys = []
369 tagString = self.attributeDictionary['tags']
370 if tagString.startswith('='):
371 tagString = tagString[1 :]
372 if tagString.startswith('['):
373 tagString = tagString[1 :]
374 if tagString.endswith(']'):
375 tagString = tagString[: -1]
376 for tagWord in tagString.split(','):
377 tagKey = tagWord.strip()
378 if tagKey != '':
379 tagKeys.append(tagKey)
380 return tagKeys
381
389
391 'Get the vertexes.'
392 if self.xmlObject == None:
393 return []
394 return self.xmlObject.getVertexes()
395
397 'Get the xml element by id.'
398 idDictionary = self.getRoot().idDictionary
399 if idKey in idDictionary:
400 return idDictionary[idKey]
401 return None
402
406
410
412 'Get the xml elements by name.'
413 nameDictionary = self.getRoot().nameDictionary
414 if name in nameDictionary:
415 return nameDictionary[name]
416 return None
417
419 'Get the xml elements by tag.'
420 tagDictionary = self.getRoot().tagDictionary
421 if tag in tagDictionary:
422 return tagDictionary[tag]
423 return None
424
426 'Get the xmlProcessor.'
427 return self.getRoot().xmlProcessor
428
430 'Link self to xmlObject and add xmlObject to archivableObjects.'
431 self.xmlObject = xmlObject
432 self.xmlObject.xmlElement = self
433 self.parentNode.xmlObject.archivableObjects.append(self.xmlObject)
434
436 'Print all variables.'
437 print('attributeDictionary')
438 print(self.attributeDictionary)
439 print('childNodes')
440 print(self.childNodes)
441 print('idDictionary')
442 print(self.idDictionary)
443 print('importName')
444 print(self.importName)
445 print('localName')
446 print(self.localName)
447 print('nameDictionary')
448 print(self.nameDictionary)
449 print('parentNode')
450 print(self.parentNode)
451 print('tagDictionary')
452 print(self.tagDictionary)
453 print('text')
454 print(self.text)
455 print('xmlObject')
456 print(self.xmlObject)
457 print('')
458
460 'Print all variables and the root variables.'
461 self.printAllVariables()
462 root = self.getRoot()
463 if root != None and root != self:
464 print('')
465 print('Root variables:')
466 root.printAllVariables()
467
469 'Remove the childNodes from the id and name dictionaries and the childNodes.'
470 childNodesCopy = self.childNodes[:]
471 for childNode in childNodesCopy:
472 childNode.removeFromIDNameParent()
473
490
492 'Set the parentNode and add this to its childNodes.'
493 self.parentNode = parentNode
494 if self.parentNode != None:
495 self.parentNode.childNodes.append(self)
496
497
499 'A simple xml parser.'
500 - def __init__(self, fileName, parentNode, xmlText):
501 'Add empty lists.'
502 self.beforeRoot = ''
503 self.fileName = fileName
504 self.isXML = False
505 self.numberOfWarnings = 0
506 self.parentNode = parentNode
507 self.root = None
508 if parentNode != None:
509 self.root = parentNode.getRoot()
510 self.lines = getXMLLines(xmlText)
511 for self.lineIndex, line in enumerate(self.lines):
512 self.parseLine(line)
513 self.xmlText = xmlText
514
516 'Get the string representation of this parser.'
517 return str( self.root )
518
520 'Get the original reparsed root element.'
521 if evaluate.getEvaluatedBoolean(True, 'getOriginalRoot', self.root):
522 return XMLSimpleReader(self.fileName, self.parentNode, self.xmlText).root
523 return None
524
526 'Get the root element.'
527 return self.root
528
530 'Parse an xml line and add it to the xml tree.'
531 lineStripped = line.strip()
532 if len( lineStripped ) < 1:
533 return
534 if lineStripped.startswith('<?xml'):
535 self.isXML = True
536 return
537 if not self.isXML:
538 if self.numberOfWarnings < 1:
539 print('Warning, xml file should start with <?xml.')
540 print('Until it does, parseLine in XMLSimpleReader will do nothing for:')
541 print(self.fileName)
542 self.numberOfWarnings += 1
543 return
544 xmlElement = XMLElement()
545 self.parentNode = xmlElement.getParentParseReplacedLine( line, lineStripped, self.parentNode )
546 if self.root != None:
547 return
548 lowerLocalName = xmlElement.localName.lower()
549 if lowerLocalName == 'comment' or lowerLocalName == '!doctype':
550 return
551 self.root = xmlElement
552 self.root.parser = self
553 for line in self.lines[ : self.lineIndex ]:
554 self.beforeRoot += line + '\n'
555
556
557 globalGetAccessibleAttributeSet = set('getPaths getPreviousVertex getPreviousXMLElement getVertexes parent'.split())
558