36
36
37
37
class HTML5_TreeConstructer {
38
38
public $ stack =array ();
39
+ public $ content_model ;
39
40
40
41
private $ mode ;
41
42
private $ original_mode ;
@@ -138,7 +139,9 @@ public function emitToken($token, $mode = null) {
138
139
/*
139
140
$backtrace = debug_backtrace();
140
141
if ($backtrace[1]['class'] !== 'HTML5_TreeConstructer') echo "--\n";
141
- echo $this->strConst($mode) . "\n ";
142
+ echo $this->strConst($mode);
143
+ if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
144
+ echo "\n ";
142
145
token_dump($token);
143
146
$this->printStack();
144
147
$this->printActiveFormattingElements();
@@ -206,7 +209,7 @@ public function emitToken($token, $mode = null) {
206
209
/* Switch the insertion mode to "before html", then reprocess the
207
210
* current token. */
208
211
$ this ->mode =self ::BEFORE_HTML ;
209
- return $ this ->emitToken ($ token );
212
+ $ this ->emitToken ($ token );
210
213
}
211
214
break ;
212
215
@@ -253,7 +256,7 @@ public function emitToken($token, $mode = null) {
253
256
/* Switch the insertion mode to "before head", then reprocess the
254
257
* current token. */
255
258
$ this ->mode =self ::BEFORE_HEAD ;
256
- return $ this ->emitToken ($ token );
259
+ $ this ->emitToken ($ token );
257
260
}
258
261
break ;
259
262
@@ -283,7 +286,7 @@ public function emitToken($token, $mode = null) {
283
286
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='html ' ) {
284
287
/* Process the token using the rules for the "in body"
285
288
* insertion mode. */
286
- return $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
289
+ $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
287
290
288
291
/* A start tag token with the tag name "head" */
289
292
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='head ' ) {
@@ -309,7 +312,7 @@ public function emitToken($token, $mode = null) {
309
312
'type ' => HTML5_Tokenizer::STARTTAG ,
310
313
'attr ' =>array ()
311
314
));
312
- return $ this ->emitToken ($ token );
315
+ $ this ->emitToken ($ token );
313
316
314
317
/* Any other end tag */
315
318
}elseif ($ token ['type ' ] === HTML5_Tokenizer::ENDTAG ) {
@@ -327,7 +330,7 @@ public function emitToken($token, $mode = null) {
327
330
'type ' => HTML5_Tokenizer::STARTTAG ,
328
331
'attr ' =>array ()
329
332
));
330
- return $ this ->emitToken ($ token );
333
+ $ this ->emitToken ($ token );
331
334
}
332
335
break ;
333
336
@@ -356,7 +359,7 @@ public function emitToken($token, $mode = null) {
356
359
/* A start tag whose tag name is "html" */
357
360
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&
358
361
$ token ['name ' ] ==='html ' ) {
359
- return $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
362
+ $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
360
363
361
364
/* A start tag whose tag name is one of: "base", "command", "link" */
362
365
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&
@@ -391,14 +394,14 @@ public function emitToken($token, $mode = null) {
391
394
392
395
/* A start tag with the tag name "title" */
393
396
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='title ' ) {
394
- return $ this ->insertRCDATAElement ($ token );
397
+ $ this ->insertRCDATAElement ($ token );
395
398
396
399
/* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
397
400
* A start tag whose tag name is one of: "noframes", "style" */
398
401
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&
399
402
($ token ['name ' ] ==='noscript ' ||$ token ['name ' ] ==='noframes ' ||$ token ['name ' ] ==='style ' )) {
400
403
// XSCRIPT: Scripting flag not respected
401
- return $ this ->insertCDATAElement ($ token );
404
+ $ this ->insertCDATAElement ($ token );
402
405
403
406
// XSCRIPT: Scripting flag disable not implemented
404
407
@@ -426,7 +429,7 @@ public function emitToken($token, $mode = null) {
426
429
/* 7. Switch the insertion mode to "in CDATA/RCDATA" */
427
430
$ this ->mode =self ::IN_CDATA_RCDATA ;
428
431
/* 5. Switch the tokeniser's content model flag to the CDATA state. */
429
- return HTML5_Tokenizer::CDATA ;
432
+ $ this -> content_model = HTML5_Tokenizer::CDATA ;
430
433
431
434
/* An end tag with the tag name "head" */
432
435
}elseif ($ token ['type ' ] === HTML5_Tokenizer::ENDTAG &&$ token ['name ' ] ==='head ' ) {
@@ -455,15 +458,15 @@ public function emitToken($token, $mode = null) {
455
458
));
456
459
457
460
/* Then, reprocess the current token. */
458
- return $ this ->emitToken ($ token );
461
+ $ this ->emitToken ($ token );
459
462
}
460
463
break ;
461
464
462
465
case self ::IN_HEAD_NOSCRIPT :
463
466
if ($ token ['type ' ] === HTML5_Tokenizer::DOCTYPE ) {
464
467
// parse error
465
468
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='html ' ) {
466
- return $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
469
+ $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
467
470
}elseif ($ token ['type ' ] === HTML5_Tokenizer::ENDTAG &&$ token ['name ' ] ==='noscript ' ) {
468
471
/* Pop the current node (which will be a noscript element) from the
469
472
* stack of open elements; the new current node will be a head
@@ -477,7 +480,7 @@ public function emitToken($token, $mode = null) {
477
480
($ token ['type ' ] === HTML5_Tokenizer::STARTTAG && (
478
481
$ token ['name ' ] ==='link ' ||$ token ['name ' ] ==='meta ' ||
479
482
$ token ['name ' ] ==='noframes ' ||$ token ['name ' ] ==='style ' ))) {
480
- return $ this ->processWithRulesFor ($ token ,self ::IN_HEAD );
483
+ $ this ->processWithRulesFor ($ token ,self ::IN_HEAD );
481
484
// inverted logic
482
485
}elseif (
483
486
($ token ['type ' ] === HTML5_Tokenizer::STARTTAG && (
@@ -491,7 +494,7 @@ public function emitToken($token, $mode = null) {
491
494
'type ' => HTML5_Tokenizer::ENDTAG ,
492
495
'name ' =>'noscript ' ,
493
496
));
494
- return $ this ->emitToken ($ token );
497
+ $ this ->emitToken ($ token );
495
498
}
496
499
break ;
497
500
@@ -516,7 +519,7 @@ public function emitToken($token, $mode = null) {
516
519
// parse error
517
520
518
521
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='html ' ) {
519
- return $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
522
+ $ this ->processWithRulesFor ($ token ,self ::IN_BODY );
520
523
521
524
/* A start tag token with the tag name "body" */
522
525
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='body ' ) {
@@ -544,9 +547,8 @@ public function emitToken($token, $mode = null) {
544
547
/* Push the node pointed to by the head element pointer onto the
545
548
* stack of open elements. */
546
549
$ this ->stack [] =$ this ->head_pointer ;
547
- $ out = $ this ->processWithRulesFor ($ token ,self ::IN_HEAD );
550
+ $ this ->processWithRulesFor ($ token ,self ::IN_HEAD );
548
551
array_pop ($ this ->stack );
549
- return $ out ;
550
552
551
553
// inversion of specification
552
554
}elseif (
@@ -564,7 +566,7 @@ public function emitToken($token, $mode = null) {
564
566
'attr ' =>array ()
565
567
));
566
568
$ this ->flag_frameset_ok =true ;
567
- return $ this ->emitToken ($ token );
569
+ $ this ->emitToken ($ token );
568
570
}
569
571
break ;
570
572
@@ -616,7 +618,7 @@ public function emitToken($token, $mode = null) {
616
618
case 'script ' :case 'style ' :case 'title ' :
617
619
/* Process the token as if the insertion mode had been "in
618
620
head". */
619
- return $ this ->processWithRulesFor ($ token ,self ::IN_HEAD );
621
+ $ this ->processWithRulesFor ($ token ,self ::IN_HEAD );
620
622
break ;
621
623
622
624
/* A start tag token with the tag name "body" */
@@ -839,7 +841,7 @@ public function emitToken($token, $mode = null) {
839
841
/* Insert an HTML element for the token. */
840
842
$ this ->insertElement ($ token );
841
843
842
- return HTML5_Tokenizer::PLAINTEXT ;
844
+ $ this -> content_model = HTML5_Tokenizer::PLAINTEXT ;
843
845
break ;
844
846
845
847
// more diversions
@@ -1049,7 +1051,7 @@ public function emitToken($token, $mode = null) {
1049
1051
/* Parse error. Change the token's tag name to "img" and
1050
1052
reprocess it. (Don't ask.) */
1051
1053
$ token ['name ' ] ='img ' ;
1052
- return $ this ->emitToken ($ token );
1054
+ $ this ->emitToken ($ token );
1053
1055
break ;
1054
1056
1055
1057
/* A start tag whose tag name is "isindex" */
@@ -1178,7 +1180,7 @@ public function emitToken($token, $mode = null) {
1178
1180
1179
1181
/* Switch the tokeniser's content model flag to the
1180
1182
RCDATA state. */
1181
- return HTML5_Tokenizer::RCDATA ;
1183
+ $ this -> content_model = HTML5_Tokenizer::RCDATA ;
1182
1184
break ;
1183
1185
1184
1186
/* A start tag token whose tag name is "xmp" */
@@ -1188,16 +1190,17 @@ public function emitToken($token, $mode = null) {
1188
1190
1189
1191
$ this ->flag_frameset_ok =false ;
1190
1192
1191
- return $ this ->insertCDATAElement ($ token );
1193
+ $ this ->insertCDATAElement ($ token );
1192
1194
break ;
1193
1195
1194
1196
case 'iframe ' :
1195
1197
$ this ->flag_frameset_ok =false ;
1196
- return $ this ->insertCDATAElement ($ token );
1198
+ $ this ->insertCDATAElement ($ token );
1199
+ break ;
1197
1200
1198
1201
case 'noembed ' :case 'noscript ' :
1199
1202
// XSCRIPT: should check scripting flag
1200
- return $ this ->insertCDATAElement ($ token );
1203
+ $ this ->insertCDATAElement ($ token );
1201
1204
break ;
1202
1205
1203
1206
/* A start tag whose tag name is "select" */
@@ -1319,7 +1322,7 @@ public function emitToken($token, $mode = null) {
1319
1322
'type ' => HTML5_Tokenizer::ENDTAG
1320
1323
));
1321
1324
1322
- if (!$ this ->ignored )return $ this ->emitToken ($ token );
1325
+ if (!$ this ->ignored )$ this ->emitToken ($ token );
1323
1326
break ;
1324
1327
1325
1328
case 'address ' :case 'article ' :case 'aside ' :case 'blockquote ' :
@@ -1844,7 +1847,7 @@ public function emitToken($token, $mode = null) {
1844
1847
'attr ' =>array ()
1845
1848
));
1846
1849
1847
- return $ this ->emitToken ($ token );
1850
+ $ this ->emitToken ($ token );
1848
1851
1849
1852
/* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
1850
1853
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&in_array ($ token ['name ' ],
@@ -1868,7 +1871,7 @@ public function emitToken($token, $mode = null) {
1868
1871
'attr ' =>array ()
1869
1872
));
1870
1873
1871
- return $ this ->emitToken ($ token );
1874
+ $ this ->emitToken ($ token );
1872
1875
1873
1876
/* A start tag whose tag name is "table" */
1874
1877
}elseif ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&
@@ -1881,7 +1884,7 @@ public function emitToken($token, $mode = null) {
1881
1884
'type ' => HTML5_Tokenizer::ENDTAG
1882
1885
));
1883
1886
1884
- if (!$ this ->ignored )return $ this ->emitToken ($ token );
1887
+ if (!$ this ->ignored )$ this ->emitToken ($ token );
1885
1888
1886
1889
/* An end tag whose tag name is "table" */
1887
1890
}elseif ($ token ['type ' ] === HTML5_Tokenizer::ENDTAG &&
@@ -1991,7 +1994,7 @@ public function emitToken($token, $mode = null) {
1991
1994
'type ' => HTML5_Tokenizer::ENDTAG
1992
1995
));
1993
1996
1994
- if (!$ this ->ignored )return $ this ->emitToken ($ token );
1997
+ if (!$ this ->ignored )$ this ->emitToken ($ token );
1995
1998
1996
1999
/* An end tag whose tag name is one of: "body", "col", "colgroup",
1997
2000
"html", "tbody", "td", "tfoot", "th", "thead", "tr" */
@@ -2072,7 +2075,7 @@ public function emitToken($token, $mode = null) {
2072
2075
'type ' => HTML5_Tokenizer::ENDTAG
2073
2076
));
2074
2077
2075
- if (!$ this ->ignored )return $ this ->emitToken ($ token );
2078
+ if (!$ this ->ignored )$ this ->emitToken ($ token );
2076
2079
}
2077
2080
break ;
2078
2081
@@ -2100,7 +2103,7 @@ public function emitToken($token, $mode = null) {
2100
2103
'attr ' =>array ()
2101
2104
));
2102
2105
2103
- return $ this ->emitToken ($ token );
2106
+ $ this ->emitToken ($ token );
2104
2107
2105
2108
/* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2106
2109
}elseif ($ token ['type ' ] === HTML5_Tokenizer::ENDTAG &&
@@ -2148,7 +2151,7 @@ public function emitToken($token, $mode = null) {
2148
2151
'type ' => HTML5_Tokenizer::ENDTAG
2149
2152
));
2150
2153
2151
- return $ this ->emitToken ($ token );
2154
+ $ this ->emitToken ($ token );
2152
2155
}
2153
2156
2154
2157
/* An end tag whose tag name is one of: "body", "caption", "col",
@@ -2215,7 +2218,7 @@ public function emitToken($token, $mode = null) {
2215
2218
'name ' =>'tr ' ,
2216
2219
'type ' => HTML5_Tokenizer::ENDTAG
2217
2220
));
2218
- if (!$ this ->ignored )return $ this ->emitToken ($ token );
2221
+ if (!$ this ->ignored )$ this ->emitToken ($ token );
2219
2222
2220
2223
/* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2221
2224
}elseif ($ token ['type ' ] === HTML5_Tokenizer::ENDTAG &&
@@ -2235,7 +2238,7 @@ public function emitToken($token, $mode = null) {
2235
2238
'type ' => HTML5_Tokenizer::ENDTAG
2236
2239
));
2237
2240
2238
- return $ this ->emitToken ($ token );
2241
+ $ this ->emitToken ($ token );
2239
2242
}
2240
2243
2241
2244
/* An end tag whose tag name is one of: "body", "caption", "col",
@@ -2303,7 +2306,7 @@ public function emitToken($token, $mode = null) {
2303
2306
token. */
2304
2307
}else {
2305
2308
$ this ->closeCell ();
2306
- return $ this ->emitToken ($ token );
2309
+ $ this ->emitToken ($ token );
2307
2310
}
2308
2311
2309
2312
/* An end tag whose tag name is one of: "body", "caption", "col",
@@ -2328,7 +2331,7 @@ public function emitToken($token, $mode = null) {
2328
2331
token. */
2329
2332
}else {
2330
2333
$ this ->closeCell ();
2331
- return $ this ->emitToken ($ token );
2334
+ $ this ->emitToken ($ token );
2332
2335
}
2333
2336
2334
2337
/* Anything else */
@@ -2577,7 +2580,7 @@ public function emitToken($token, $mode = null) {
2577
2580
/* Parse error. Set the insertion mode to "in body" and reprocess
2578
2581
the token. */
2579
2582
$ this ->mode =self ::IN_BODY ;
2580
- return $ this ->emitToken ($ token );
2583
+ $ this ->emitToken ($ token );
2581
2584
}
2582
2585
break ;
2583
2586
@@ -3121,14 +3124,14 @@ private function insertCDATAElement($token) {
3121
3124
$ this ->insertElement ($ token );
3122
3125
$ this ->original_mode =$ this ->mode ;
3123
3126
$ this ->mode =self ::IN_CDATA_RCDATA ;
3124
- return HTML5_Tokenizer::CDATA ;
3127
+ $ this -> content_model = HTML5_Tokenizer::CDATA ;
3125
3128
}
3126
3129
3127
3130
private function insertRCDATAElement ($ token ) {
3128
3131
$ this ->insertElement ($ token );
3129
3132
$ this ->original_mode =$ this ->mode ;
3130
3133
$ this ->mode =self ::IN_CDATA_RCDATA ;
3131
- return HTML5_Tokenizer::RCDATA ;
3134
+ $ this -> content_model = HTML5_Tokenizer::RCDATA ;
3132
3135
}
3133
3136
3134
3137
private function getAttr ($ token ,$ key ) {
@@ -3137,7 +3140,6 @@ private function getAttr($token, $key) {
3137
3140
foreach ($ token ['attr ' ]as $ keypair ) {
3138
3141
if ($ keypair ['name ' ] ===$ key )$ ret =$ keypair ['value ' ];
3139
3142
}
3140
- var_dump ($ ret );
3141
3143
return $ ret ;
3142
3144
}
3143
3145