|
7 | 7 |
|
8 | 8 | fromhtml5lib.constantsimportvoidElements
|
9 | 9 |
|
| 10 | +classRoot(object): |
| 11 | +def__init__(self,et): |
| 12 | +self.elementtree=et |
| 13 | +self.children= [] |
| 14 | +ifet.docinfo.internalDTD: |
| 15 | +self.children.append(Doctype(self,et.docinfo.root_name, |
| 16 | +et.docinfo.public_id, |
| 17 | +et.docinfo.system_url)) |
| 18 | +root=et.getroot() |
| 19 | +node=root |
| 20 | + |
| 21 | +whilenode.getprevious()isnotNone: |
| 22 | +node=node.getprevious() |
| 23 | +whilenodeisnotNone: |
| 24 | +self.children.append(node) |
| 25 | +node=node.getnext() |
| 26 | + |
| 27 | +self.text=None |
| 28 | +self.tail=None |
| 29 | + |
| 30 | +def__getitem__(self,key): |
| 31 | +returnself.children[key] |
| 32 | + |
| 33 | +defgetnext(self): |
| 34 | +returnNone |
| 35 | + |
| 36 | +classDoctype(object): |
| 37 | +def__init__(self,root_node,name,public_id,system_id): |
| 38 | +self.root_node=root_node |
| 39 | +self.name=name |
| 40 | +self.public_id=public_id |
| 41 | +self.system_id=system_id |
| 42 | + |
| 43 | +self.text=None |
| 44 | +self.tail=None |
| 45 | + |
| 46 | +defgetnext(self): |
| 47 | +returnself.root_node.children[1] |
| 48 | + |
| 49 | +classFragmentRoot(Root): |
| 50 | +def__init__(self,children): |
| 51 | +self.children= [FragmentWrapper(self,child)forchildinchildren] |
| 52 | +self.text=self.tail=None |
| 53 | + |
| 54 | +defgetnext(self): |
| 55 | +returnNone |
| 56 | + |
| 57 | +classFragmentWrapper(object): |
| 58 | +def__init__(self,fragment_root,obj): |
| 59 | +self.root_node=fragment_root |
| 60 | +self.obj=obj |
| 61 | +ifhasattr(self.obj,'text'): |
| 62 | +self.text=self.obj.text |
| 63 | +else: |
| 64 | +self.text=None |
| 65 | +ifhasattr(self.obj,'tail'): |
| 66 | +self.tail=self.obj.tail |
| 67 | +else: |
| 68 | +self.tail=None |
| 69 | +self.isstring=isinstance(obj,basestring) |
| 70 | + |
| 71 | +def__getattr__(self,name): |
| 72 | +returngetattr(self.obj,name) |
| 73 | + |
| 74 | +defgetnext(self): |
| 75 | +siblings=self.root_node.children |
| 76 | +idx=siblings.index(self) |
| 77 | +ifidx<len(siblings)-1: |
| 78 | +returnsiblings[idx+1] |
| 79 | +else: |
| 80 | +returnNone |
| 81 | + |
| 82 | +def__getitem__(self,key): |
| 83 | +returnself.obj[key] |
| 84 | + |
| 85 | +def__nonzero__(self): |
| 86 | +returnbool(self.obj) |
| 87 | + |
| 88 | +defgetparent(self): |
| 89 | +returnNone |
| 90 | + |
| 91 | +def__str__(self): |
| 92 | +returnstr(self.obj) |
| 93 | + |
| 94 | + |
10 | 95 | classTreeWalker(_base.NonRecursiveTreeWalker):
|
| 96 | +def__init__(self,tree): |
| 97 | +ifhasattr(tree,"getroot"): |
| 98 | +tree=Root(tree) |
| 99 | +elifisinstance(tree,list): |
| 100 | +tree=FragmentRoot(tree) |
| 101 | +_base.NonRecursiveTreeWalker.__init__(self,tree) |
11 | 102 | defgetNodeDetails(self,node):
|
12 | 103 | ifisinstance(node,tuple):# Text node
|
13 | 104 | node,key=node
|
14 | 105 | assertkeyin ("text","tail"),_("Text nodes are text or tail, found %s")%key
|
15 | 106 | return_base.TEXT,getattr(node,key)
|
16 | 107 |
|
17 |
| -ifnot(hasattr(node,"tag")): |
18 |
| -node=node.getroot() |
19 |
| - |
20 |
| -ifnode.tagin ("<DOCUMENT_ROOT>","<DOCUMENT_FRAGMENT>"): |
| 108 | +elifisinstance(node,Root): |
21 | 109 | return (_base.DOCUMENT,)
|
22 | 110 |
|
23 |
| -elifnode.tag=="<!DOCTYPE>": |
24 |
| -return_base.DOCTYPE,node.text |
| 111 | +elifisinstance(node,Doctype): |
| 112 | +return_base.DOCTYPE,node.name,node.public_id,node.system_id |
| 113 | + |
| 114 | +elifisinstance(node,FragmentWrapper)andnode.isstring: |
| 115 | +return_base.TEXT,node |
25 | 116 |
|
26 | 117 | elifnode.tag==etree.Comment:
|
27 | 118 | return_base.COMMENT,node.text
|
|