31
31
// XERROR - with regards to parse errors
32
32
// XSCRIPT - with regards to scripting mode
33
33
// XENCODING - with regards to encoding (for reparsing tests)
34
+ // XSKETCHY - godawful workarounds
34
35
35
36
class HTML5_TreeBuilder {
36
37
public $ stack =array ();
@@ -128,6 +129,15 @@ private function strConst($number) {
128
129
const NS_XML ='http://www.w3.org/XML/1998/namespace ' ;
129
130
const NS_XMLNS ='http://www.w3.org/2000/xmlns/ ' ;
130
131
132
+ public $ nsToPrefix =array (
133
+ self ::NS_HTML =>'' ,
134
+ self ::NS_MATHML =>'math: ' ,
135
+ self ::NS_SVG =>'svg: ' ,
136
+ self ::NS_XLINK =>'xlink: ' ,
137
+ self ::NS_XML =>'xml: ' ,
138
+ self ::NS_XMLNS =>'xmlns: ' ,
139
+ );
140
+
131
141
public function __construct () {
132
142
$ this ->mode =self ::INITIAL ;
133
143
$ this ->dom =new DOMDocument ;
@@ -140,7 +150,6 @@ public function __construct() {
140
150
141
151
// Process tag tokens
142
152
public function emitToken ($ token ,$ mode =null ) {
143
- // XXX: ignore parse errors... why are we emitting them, again?
144
153
if ($ token ['type ' ] === HTML5_Tokenizer::PARSEERROR )return ;
145
154
if ($ mode ===null )$ mode =$ this ->mode ;
146
155
@@ -195,9 +204,6 @@ public function emitToken($token, $mode = null) {
195
204
* doctype attribute of the Document object. */
196
205
if (!isset ($ token ['public ' ]))$ token ['public ' ] =null ;
197
206
if (!isset ($ token ['system ' ]))$ token ['system ' ] =null ;
198
- // Yes this is hacky. I'm kind of annoyed that I can't appendChild
199
- // a doctype to DOMDocument. Maybe I haven't chanted the right
200
- // syllables.
201
207
$ impl =new DOMImplementation ();
202
208
// This call can fail for particularly pathological cases (namely,
203
209
// the qualifiedName parameter ($token['name']) could be missing.
@@ -1753,7 +1759,7 @@ public function emitToken($token, $mode = null) {
1753
1759
* elements with an entry for the new element, and
1754
1760
* let node be the new element. */
1755
1761
// we don't know what the token is anymore
1756
- $ clone =$ node ->cloneNode ();
1762
+ $ clone =$ this ->cloneNode ($ node );
1757
1763
$ a_pos =array_search ($ node ,$ this ->a_formatting ,true );
1758
1764
$ s_pos =array_search ($ node ,$ this ->stack ,true );
1759
1765
$ this ->a_formatting [$ a_pos ] =$ clone ;
@@ -1794,7 +1800,7 @@ public function emitToken($token, $mode = null) {
1794
1800
1795
1801
/* 8. Create an element for the token for which the
1796
1802
* formatting element was created. */
1797
- $ clone =$ formatting_element ->cloneNode ();
1803
+ $ clone =$ this ->cloneNode ($ formatting_element );
1798
1804
1799
1805
/* 9. Take all of the child nodes of the furthest
1800
1806
block and append them to the element created in the
@@ -3177,7 +3183,7 @@ private function reconstructActiveFormattingElements() {
3177
3183
}
3178
3184
3179
3185
/* 8. Perform a shallow clone of the element entry to obtain clone. */
3180
- $ clone =$ entry ->cloneNode ();
3186
+ $ clone =$ this ->cloneNode ($ entry );
3181
3187
3182
3188
/* 9. Append clone to the current node and push it onto the stack
3183
3189
of open elements so that it is the new current node. */
@@ -3672,22 +3678,25 @@ public function insertForeignElement($token, $namespaceURI) {
3672
3678
if (!empty ($ token ['attr ' ])) {
3673
3679
foreach ($ token ['attr ' ]as $ kp ) {
3674
3680
$ attr =$ kp ['name ' ];
3681
+ // XSKETCHY: this entire thing is a hack to get around
3682
+ // DOM's really bad XML implementation
3675
3683
if (is_array ($ attr )) {
3676
3684
$ ns =$ attr [2 ];
3677
3685
$ attr =$ attr [1 ];
3678
3686
}else {
3679
3687
$ ns =self ::NS_HTML ;
3680
3688
}
3681
- if (! $ el -> hasAttributeNS ( $ ns , $ attr ) ) {
3682
- //XSKETCHY: work around godawful libxml bug
3683
- if ( $ ns === self :: NS_XLINK ) {
3684
- $ el ->setAttribute ( ' xlink: ' . $ attr ,$ kp ['value ' ]);
3685
- } elseif ( $ ns === self :: NS_HTML ) {
3686
- // Another godawful libxml bug
3687
- $ el ->setAttribute ($ attr ,$ kp ['value ' ]);
3688
- } else {
3689
- $ el ->setAttributeNS ( $ ns , $ attr , $ kp [ ' value ' ] );
3689
+ if ($ ns === self :: NS_XML ) {
3690
+ //this is special cased since DOM converts xml:lang
3691
+ // into lang
3692
+ $ el ->setAttributeNS ( $ ns , $ attr ,$ kp ['value ' ]);
3693
+ } else {
3694
+ $ prefix = $ this -> nsToPrefix [ $ ns ];
3695
+ $ el ->setAttribute ($ prefix . $ attr ,$ kp ['value ' ]);
3696
+ if (! isset ( $ el -> html5_namespaced )) {
3697
+ $ el ->html5_namespaced = array ( );
3690
3698
}
3699
+ $ el ->html5_namespaced [$ prefix .$ attr ] =true ;
3691
3700
}
3692
3701
}
3693
3702
}
@@ -3701,6 +3710,14 @@ public function insertForeignElement($token, $namespaceURI) {
3701
3710
* value is not the XLink Namespace, that is a parse error. */
3702
3711
}
3703
3712
3713
+ private function cloneNode ($ node ) {
3714
+ $ clone =$ node ->cloneNode ();
3715
+ if (isset ($ node ->html5_namespaced )) {
3716
+ $ clone ->html5_namespaced =$ node ->html5_namespaced ;
3717
+ }
3718
+ return $ clone ;
3719
+ }
3720
+
3704
3721
public function save () {
3705
3722
$ this ->dom ->normalize ();
3706
3723
if (!$ this ->fragment ) {