@@ -58,6 +58,8 @@ class HTML5_TreeConstructer {
58
58
// code can check for (bool)$ignore_lf_token, but it phases out
59
59
// appropriately)
60
60
private $ ignore_lf_token =0 ;
61
+ private $ fragment =false ;
62
+ private $ root ;
61
63
62
64
// XFOREIGN: SVG's foreignObject is included in scoping
63
65
private $ scoping =array ('applet ' ,'button ' ,'caption ' ,'html ' ,'marquee ' ,'object ' ,'table ' ,'td ' ,'th ' );
@@ -1955,6 +1957,7 @@ public function emitToken($token, $mode = null) {
1955
1957
scope with the same tag name as the token, this is a parse error.
1956
1958
Ignore the token. (fragment case) */
1957
1959
if (!$ this ->elementInScope ($ token ['name ' ],true )) {
1960
+ $ this ->ignored =true ;
1958
1961
// Ignore
1959
1962
1960
1963
/* Otherwise: */
@@ -2999,7 +3002,7 @@ private function clearStackToTableContext($elements) {
2999
3002
}
3000
3003
}
3001
3004
3002
- private function resetInsertionMode () {
3005
+ private function resetInsertionMode ($ context = null ) {
3003
3006
/* 1. Let last be false. */
3004
3007
$ last =false ;
3005
3008
$ leng =count ($ this ->stack );
@@ -3008,16 +3011,16 @@ private function resetInsertionMode() {
3008
3011
/* 2. Let node be the last node in the stack of open elements. */
3009
3012
$ node =$ this ->stack [$ n ];
3010
3013
3011
- /* 3. If node is the first node in the stack of open elements, then
3012
- set last to true. If the element whose innerHTML attribute is being
3013
- set is neither a td element nor a th element, then set node to the
3014
- element whose innerHTML attribute is being set. (innerHTML case) */
3014
+ /* 3. If node is the first node in the stack of open elements, then
3015
+ * set last to true and set node to the context element. (fragment
3016
+ * case) */
3015
3017
if ($ this ->stack [0 ]->isSameNode ($ node )) {
3016
3018
$ last =true ;
3019
+ $ node =$ context ;
3017
3020
}
3018
3021
3019
3022
/* 4. If node is a select element, then switch the insertion mode to
3020
- "in select" and abort these steps. (innerHTML case) */
3023
+ "in select" and abort these steps. (fragment case) */
3021
3024
if ($ node ->tagName ==='select ' ) {
3022
3025
$ this ->mode =self ::IN_SELECT ;
3023
3026
break ;
@@ -3037,7 +3040,7 @@ private function resetInsertionMode() {
3037
3040
/* 7. If node is a tbody, thead, or tfoot element, then switch the
3038
3041
insertion mode to "in table body" and abort these steps. */
3039
3042
}elseif (in_array ($ node ->tagName ,array ('tbody ' ,'thead ' ,'tfoot ' ))) {
3040
- $ this ->mode =self ::IN_TBODY ;
3043
+ $ this ->mode =self ::IN_TABLE_BODY ;
3041
3044
break ;
3042
3045
3043
3046
/* 8. If node is a caption element, then switch the insertion mode
@@ -3049,7 +3052,7 @@ private function resetInsertionMode() {
3049
3052
/* 9. If node is a colgroup element, then switch the insertion mode
3050
3053
to "in column group" and abort these steps. (innerHTML case) */
3051
3054
}elseif ($ node ->tagName ==='colgroup ' ) {
3052
- $ this ->mode =self ::IN_CGROUP ;
3055
+ $ this ->mode =self ::IN_COLUMN_GROUP ;
3053
3056
break ;
3054
3057
3055
3058
/* 10. If node is a table element, then switch the insertion mode
@@ -3058,38 +3061,44 @@ private function resetInsertionMode() {
3058
3061
$ this ->mode =self ::IN_TABLE ;
3059
3062
break ;
3060
3063
3061
- /* 11. If node is a head element, then switch the insertion mode
3064
+ /* 11. If node is an element from the MathML namespace or the SVG
3065
+ * namespace, then switch the insertion mode to "in foreign
3066
+ * content", let the secondary insertion mode be "in body", and
3067
+ * abort these steps. */
3068
+ // XFOREIGN: implement me
3069
+
3070
+ /* 12. If node is a head element, then switch the insertion mode
3062
3071
to "in body" ("in body"! not "in head"!) and abort these steps.
3063
- (innerHTML case) */
3072
+ (fragment case) */
3064
3073
}elseif ($ node ->tagName ==='head ' ) {
3065
3074
$ this ->mode =self ::IN_BODY ;
3066
3075
break ;
3067
3076
3068
- /*12 . If node is a body element, then switch the insertion mode to
3077
+ /*13 . If node is a body element, then switch the insertion mode to
3069
3078
"in body" and abort these steps. */
3070
3079
}elseif ($ node ->tagName ==='body ' ) {
3071
3080
$ this ->mode =self ::IN_BODY ;
3072
3081
break ;
3073
3082
3074
- /*13 . If node is a frameset element, then switch the insertion
3075
- mode to "in frameset" and abort these steps. (innerHTML case) */
3083
+ /*14 . If node is a frameset element, then switch the insertion
3084
+ mode to "in frameset" and abort these steps. (fragment case) */
3076
3085
}elseif ($ node ->tagName ==='frameset ' ) {
3077
- $ this ->mode =self ::IN_FRAME ;
3086
+ $ this ->mode =self ::IN_FRAMESET ;
3078
3087
break ;
3079
3088
3080
- /*14 . If node is an html element, then: if the head element
3089
+ /*15 . If node is an html element, then: if the head element
3081
3090
pointer is null, switch the insertion mode to "before head",
3082
3091
otherwise, switch the insertion mode to "after head". In either
3083
- case, abort these steps. (innerHTML case) */
3092
+ case, abort these steps. (fragment case) */
3084
3093
}elseif ($ node ->tagName ==='html ' ) {
3085
3094
$ this ->mode = ($ this ->head_pointer ===null )
3086
- ?self ::BEFOR_HEAD
3095
+ ?self ::BEFORE_HEAD
3087
3096
:self ::AFTER_HEAD ;
3088
3097
3089
3098
break ;
3090
3099
3091
- /*15 . If last is true, then set the insertion mode to "in body"
3092
- and abort these steps. (innerHTML case) */
3100
+ /*16 . If last is true, then set the insertion mode to "in body"
3101
+ and abort these steps. (fragment case) */
3093
3102
}elseif ($ last ) {
3094
3103
$ this ->mode =self ::IN_BODY ;
3095
3104
break ;
@@ -3237,9 +3246,66 @@ public function currentTableIsTainted() {
3237
3246
return !empty ($ this ->getCurrentTable ()->tainted );
3238
3247
}
3239
3248
3249
+ /**
3250
+ * Sets up the tree constructor for building a fragment.
3251
+ */
3252
+ public function setupContext ($ context =null ) {
3253
+ $ this ->fragment =true ;
3254
+ $ context =$ this ->dom ->createElement ($ context );
3255
+ if ($ context ) {
3256
+ /* 4.1. Set the HTML parser's tokenization stage's content model
3257
+ * flag according to the context element, as follows: */
3258
+ switch ($ context ->tagName ) {
3259
+ case 'title ' :case 'textarea ' :
3260
+ $ this ->content_model = HTML5_Tokenizer::RCDATA ;
3261
+ break ;
3262
+ case 'style ' :case 'script ' :case 'xmp ' :case 'iframe ' :
3263
+ case 'noembed ' :case 'noframes ' :
3264
+ $ this ->content_model = HTML5_Tokenizer::CDATA ;
3265
+ break ;
3266
+ case 'noscript ' :
3267
+ // XSCRIPT: assuming scripting is enabled
3268
+ $ this ->content_model = HTML5_Tokenizer::CDATA ;
3269
+ break ;
3270
+ case 'plaintext ' :
3271
+ $ this ->content_model = HTML5_Tokenizer::PLAINTEXT ;
3272
+ break ;
3273
+ }
3274
+ /* 4.2. Let root be a new html element with no attributes. */
3275
+ $ root =$ this ->dom ->createElement ('html ' );
3276
+ $ this ->root =$ root ;
3277
+ /* 4.3 Append the element root to the Document node created above. */
3278
+ $ this ->dom ->appendChild ($ root );
3279
+ /* 4.4 Set up the parser's stack of open elements so that it
3280
+ * contains just the single element root. */
3281
+ $ this ->stack =array ($ root );
3282
+ /* 4.5 Reset the parser's insertion mode appropriately. */
3283
+ $ this ->resetInsertionMode ($ context );
3284
+ /* 4.6 Set the parser's form element pointer to the nearest node
3285
+ * to the context element that is a form element (going straight up
3286
+ * the ancestor chain, and including the element itself, if it is a
3287
+ * form element), or, if there is no such form element, to null. */
3288
+ $ node =$ context ;
3289
+ do {
3290
+ if ($ node ->tagName ==='form ' ) {
3291
+ $ this ->form_pointer =$ node ;
3292
+ break ;
3293
+ }
3294
+ }while ($ node =$ node ->parentNode );
3295
+ }
3296
+ }
3297
+
3240
3298
3241
3299
public function save () {
3242
- return $ this ->dom ;
3300
+ if (!$ this ->fragment ) {
3301
+ return $ this ->dom ;
3302
+ }else {
3303
+ if ($ this ->root ) {
3304
+ return $ this ->root ->childNodes ;
3305
+ }else {
3306
+ return $ this ->dom ->childNodes ;
3307
+ }
3308
+ }
3243
3309
}
3244
3310
}
3245
3311