@@ -93,19 +93,31 @@ class HTML5_TreeConstructer {
93
93
const AFTER_AFTER_BODY =20 ;
94
94
const AFTER_AFTER_FRAMESET =21 ;
95
95
96
+ /**
97
+ * Converts a magic number to a readable name. Use for debugging.
98
+ */
99
+ private function strConst ($ number ) {
100
+ static $ lookup ;
101
+ if (!$ lookup ) {
102
+ $ r =new ReflectionClass ('HTML5_TreeConstructer ' );
103
+ $ lookup =array_flip ($ r ->getConstants ());
104
+ }
105
+ return $ lookup [$ number ];
106
+ }
107
+
96
108
// The different types of elements.
97
- const SPECIAL =0 ;
98
- const SCOPING =1 ;
99
- const FORMATTING =2 ;
100
- const PHRASING =3 ;
109
+ const SPECIAL =100 ;
110
+ const SCOPING =101 ;
111
+ const FORMATTING =102 ;
112
+ const PHRASING =103 ;
101
113
102
114
// Quirks modes in $quirks_mode
103
- const NO_QUIRKS =0 ;
104
- const QUIRKS_MODE =1 ;
105
- const LIMITED_QUIRKS_MODE =2 ;
115
+ const NO_QUIRKS =200 ;
116
+ const QUIRKS_MODE =201 ;
117
+ const LIMITED_QUIRKS_MODE =202 ;
106
118
107
119
// Marker to be placed in $a_formatting
108
- const MARKER =0 ;
120
+ const MARKER =300 ;
109
121
110
122
public function __construct () {
111
123
$ this ->mode =self ::INITIAL ;
@@ -119,10 +131,21 @@ public function __construct() {
119
131
120
132
// Process tag tokens
121
133
public function emitToken ($ token ,$ mode =null ) {
134
+ // XXX: ignore parse errors... why are we emitting them, again?
135
+ if ($ token ['type ' ] === HTML5_Tokenizer::PARSEERROR )return ;
136
+ if ($ mode ===null )$ mode =$ this ->mode ;
137
+
138
+ /*
139
+ $backtrace = debug_backtrace();
140
+ if ($backtrace[1]['class'] !== 'HTML5_TreeConstructer') echo "--\n";
141
+ echo $this->strConst($mode) . "\n ";
142
+ token_dump($token);
143
+ if ($this->foster_parent) echo " -> this is a foster parent mode\n";
144
+ */
145
+
122
146
if ($ this ->ignore_lf_token )$ this ->ignore_lf_token --;
123
147
$ this ->ignored =false ;
124
148
// indenting is a little wonky, this can be changed later on
125
- if ($ mode ===null )$ mode =$ this ->mode ;
126
149
switch ($ mode ) {
127
150
128
151
case self ::INITIAL :
@@ -164,8 +187,15 @@ public function emitToken($token, $mode = null) {
164
187
// a doctype to DOMDocument. Maybe I haven't chanted the right
165
188
// syllables.
166
189
$ impl =new DOMImplementation ();
167
- $ doctype =$ impl ->createDocumentType ($ token ['name ' ],$ token ['public ' ],$ token ['system ' ]);
168
- $ this ->dom ->appendChild ($ doctype );
190
+ // This call can fail for particularly pathological cases (namely,
191
+ // the qualifiedName parameter ($token['name']) could be missing.
192
+ if ($ token ['name ' ]) {
193
+ $ doctype =$ impl ->createDocumentType ($ token ['name ' ],$ token ['public ' ],$ token ['system ' ]);
194
+ $ this ->dom ->appendChild ($ doctype );
195
+ }else {
196
+ // It looks like libxml's not actually *able* to express this case.
197
+ // So... don't. XXX
198
+ }
169
199
// XQUIRKS: Implement quirks mode
170
200
$ this ->mode =self ::BEFORE_HTML ;
171
201
}else {
@@ -828,10 +858,19 @@ public function emitToken($token, $mode = null) {
828
858
break ;
829
859
830
860
}elseif ($ this ->a_formatting [$ n ]->tagName ==='a ' ) {
861
+ $ a =$ this ->a_formatting [$ n ];
831
862
$ this ->emitToken (array (
832
863
'name ' =>'a ' ,
833
864
'type ' => HTML5_Tokenizer::ENDTAG
834
865
));
866
+ if (in_array ($ a ,$ this ->a_formatting )) {
867
+ $ a_i =array_search ($ a ,$ this ->a_formatting ,true );
868
+ if ($ a_i !==false )array_splice ($ this ->a_formatting ,$ a_i ,1 );
869
+ }
870
+ if (in_array ($ a ,$ this ->stack )) {
871
+ $ a_i =array_search ($ a ,$ this ->stack ,true );
872
+ if ($ a_i !==false )array_splice ($ this ->stack ,$ a_i ,1 );
873
+ }
835
874
break ;
836
875
}
837
876
}
@@ -1326,7 +1365,7 @@ public function emitToken($token, $mode = null) {
1326
1365
// parse error
1327
1366
}
1328
1367
/* 3. Remove node from the stack of open elements. */
1329
- array_splice ($ this ->stack ,array_search ($ node ,$ this ->stack ),1 );
1368
+ array_splice ($ this ->stack ,array_search ($ node ,$ this ->stack , true ),1 );
1330
1369
}
1331
1370
1332
1371
break ;
@@ -1896,9 +1935,10 @@ public function emitToken($token, $mode = null) {
1896
1935
node, it must instead be inserted into the foster parent element. */
1897
1936
if (in_array (end ($ this ->stack )->tagName ,
1898
1937
array ('table ' ,'tbody ' ,'tfoot ' ,'thead ' ,'tr ' ))) {
1938
+ $ old =$ this ->foster_parent ;
1899
1939
$ this ->foster_parent =true ;
1900
1940
$ this ->processWithRulesFor ($ token ,self ::IN_BODY );
1901
- $ this ->foster_parent =false ;
1941
+ $ this ->foster_parent =$ old ;
1902
1942
}else {
1903
1943
$ this ->processWithRulesFor ($ token ,self ::IN_BODY );
1904
1944
}
@@ -2753,7 +2793,7 @@ private function appendToRealParent($node) {
2753
2793
}
2754
2794
2755
2795
private function appendChild ($ parent ,$ node ) {
2756
- if ($ nodeinstanceof DOMCharacterData &&$ parent ->lastChild instanceof DOMCharacterData ) {
2796
+ if ($ nodeinstanceof DOMText &&$ parent ->lastChild instanceof DOMText ) {
2757
2797
// attach text to previous node
2758
2798
$ parent ->lastChild ->data .=$ node ->data ;
2759
2799
}else {
@@ -2762,11 +2802,11 @@ private function appendChild($parent, $node) {
2762
2802
}
2763
2803
2764
2804
private function insertBefore ($ parent ,$ node ,$ marker ) {
2765
- if ($ nodeinstanceof DOMCharacterData ) {
2766
- if ($ markerinstanceof DOMCharacterData ) {
2805
+ if ($ nodeinstanceof DOMText ) {
2806
+ if ($ markerinstanceof DOMText ) {
2767
2807
$ marker ->data =$ node ->data .$ marker ->data ;
2768
2808
return ;
2769
- }elseif ($ marker ->previousSibling &&$ marker ->previousSibling instanceof DOMCharacterData ) {
2809
+ }elseif ($ marker ->previousSibling &&$ marker ->previousSibling instanceof DOMText ) {
2770
2810
$ marker ->previousSibling ->data .=$ node ->data ;
2771
2811
return ;
2772
2812
}
@@ -3162,6 +3202,16 @@ public function fosterParent($node) {
3162
3202
}
3163
3203
}
3164
3204
3205
+ /**
3206
+ * For debugging, prints the stack
3207
+ */
3208
+ private function printStack () {
3209
+ echo " Stack: \n" ;
3210
+ foreach ($ this ->stack as $ i =>$ element ) {
3211
+ echo " " . ($ i +1 ) .". " .$ element ->tagName ."\n" ;
3212
+ }
3213
+ }
3214
+
3165
3215
public function currentTableIsTainted () {
3166
3216
return !empty ($ this ->getCurrentTable ()->tainted );
3167
3217
}