@@ -70,6 +70,9 @@ class HTML5_TreeBuilder {
70
70
'p ' ,'param ' ,'plaintext ' ,'pre ' ,'script ' ,'select ' ,'spacer ' ,'style ' ,
71
71
'tbody ' ,'textarea ' ,'tfoot ' ,'thead ' ,'title ' ,'tr ' ,'ul ' ,'wbr ' );
72
72
73
+ private $ pendingTableCharacters ;
74
+ private $ pendingTableCharactersDirty ;
75
+
73
76
// Tree construction modes
74
77
const INITIAL =0 ;
75
78
const BEFORE_HTML =1 ;
@@ -80,19 +83,20 @@ class HTML5_TreeBuilder {
80
83
const IN_BODY =6 ;
81
84
const IN_CDATA_RCDATA =7 ;
82
85
const IN_TABLE =8 ;
83
- const IN_CAPTION =9 ;
84
- const IN_COLUMN_GROUP =10 ;
85
- const IN_TABLE_BODY =11 ;
86
- const IN_ROW =12 ;
87
- const IN_CELL =13 ;
88
- const IN_SELECT =14 ;
89
- const IN_SELECT_IN_TABLE =15 ;
90
- const IN_FOREIGN_CONTENT =16 ;
91
- const AFTER_BODY =17 ;
92
- const IN_FRAMESET =18 ;
93
- const AFTER_FRAMESET =19 ;
94
- const AFTER_AFTER_BODY =20 ;
95
- const AFTER_AFTER_FRAMESET =21 ;
86
+ const IN_TABLE_TEXT =9 ;
87
+ const IN_CAPTION =10 ;
88
+ const IN_COLUMN_GROUP =11 ;
89
+ const IN_TABLE_BODY =12 ;
90
+ const IN_ROW =13 ;
91
+ const IN_CELL =14 ;
92
+ const IN_SELECT =15 ;
93
+ const IN_SELECT_IN_TABLE =16 ;
94
+ const IN_FOREIGN_CONTENT =17 ;
95
+ const AFTER_BODY =18 ;
96
+ const IN_FRAMESET =19 ;
97
+ const AFTER_FRAMESET =20 ;
98
+ const AFTER_AFTER_BODY =21 ;
99
+ const AFTER_AFTER_FRAMESET =22 ;
96
100
97
101
/**
98
102
* Converts a magic number to a readable name. Use for debugging.
@@ -1940,17 +1944,21 @@ public function emitToken($token, $mode = null) {
1940
1944
case self ::IN_TABLE :
1941
1945
$ clear =array ('html ' ,'table ' );
1942
1946
1943
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1944
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1945
- or U+0020 SPACE */
1946
- if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER &&
1947
- /* If the current table is tainted, then act as described in
1948
- * the "anything else" entry below. */
1949
- // Note: hsivonen has a test that fails due to this line
1950
- // because he wants to convince Hixie not to do taint
1951
- !$ this ->currentTableIsTainted ()) {
1952
- /* Append the character to the current node. */
1953
- $ this ->insertText ($ token ['data ' ]);
1947
+ /* A character token */
1948
+ if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER ||
1949
+ $ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
1950
+ /* Let the pending table character tokens
1951
+ * be an empty list of tokens. */
1952
+ $ this ->pendingTableCharacters ="" ;
1953
+ $ this ->pendingTableCharactersDirty =false ;
1954
+ /* Let the original insertion mode be the current
1955
+ * insertion mode. */
1956
+ $ this ->original_mode =$ this ->mode ;
1957
+ /* Switch the insertion mode to
1958
+ * "in table text" and
1959
+ * reprocess the token. */
1960
+ $ this ->mode =self ::IN_TABLE_TEXT ;
1961
+ $ this ->emitToken ($ token );
1954
1962
1955
1963
/* A comment token */
1956
1964
}elseif ($ token ['type ' ] === HTML5_Tokenizer::COMMENT ) {
@@ -2096,6 +2104,57 @@ public function emitToken($token, $mode = null) {
2096
2104
}
2097
2105
break ;
2098
2106
2107
+ case self ::IN_TABLE_TEXT :
2108
+ /* A character token */
2109
+ if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER ) {
2110
+ /* Append the character token to the pending table
2111
+ * character tokens list. */
2112
+ $ this ->pendingTableCharacters .=$ token ['data ' ];
2113
+ $ this ->pendingTableCharactersDirty =true ;
2114
+ }elseif ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
2115
+ $ this ->pendingTableCharacters .=$ token ['data ' ];
2116
+ /* Anything else */
2117
+ }else {
2118
+ if ($ this ->pendingTableCharacters !=='' &&is_string ($ this ->pendingTableCharacters )) {
2119
+ /* If any of the tokens in the pending table character tokens list
2120
+ * are character tokens that are not one of U+0009 CHARACTER
2121
+ * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2122
+ * U+0020 SPACE, then reprocess those character tokens using the
2123
+ * rules given in the "anything else" entry in the in table"
2124
+ * insertion mode.*/
2125
+ if ($ this ->pendingTableCharactersDirty ) {
2126
+ /* Parse error. Process the token using the rules for the
2127
+ * "in body" insertion mode, except that if the current
2128
+ * node is a table, tbody, tfoot, thead, or tr element,
2129
+ * then, whenever a node would be inserted into the current
2130
+ * node, it must instead be foster parented. */
2131
+ // XERROR
2132
+ $ old =$ this ->foster_parent ;
2133
+ $ this ->foster_parent =true ;
2134
+ $ text_token =array (
2135
+ 'type ' => HTML5_Tokenizer::CHARACTER ,
2136
+ 'data ' =>$ this ->pendingTableCharacters ,
2137
+ );
2138
+ $ this ->processWithRulesFor ($ text_token ,self ::IN_BODY );
2139
+ $ this ->foster_parent =$ old ;
2140
+
2141
+ /* Otherwise, insert the characters given by the pending table
2142
+ * character tokens list into the current node. */
2143
+ }else {
2144
+ $ this ->insertText ($ this ->pendingTableCharacters );
2145
+ }
2146
+ $ this ->pendingTableCharacters =null ;
2147
+ $ this ->pendingTableCharactersNull =null ;
2148
+ }
2149
+
2150
+ /* Switch the insertion mode to the original insertion mode and
2151
+ * reprocess the token.
2152
+ */
2153
+ $ this ->mode =$ this ->original_mode ;
2154
+ $ this ->emitToken ($ token );
2155
+ }
2156
+ break ;
2157
+
2099
2158
case self ::IN_CAPTION :
2100
2159
/* An end tag whose tag name is "caption" */
2101
2160
if ($ token ['type ' ] === HTML5_Tokenizer::ENDTAG &&$ token ['name ' ] ==='caption ' ) {
@@ -3458,12 +3517,8 @@ private function getFosterParent() {
3458
3517
public function fosterParent ($ node ) {
3459
3518
$ foster_parent =$ this ->getFosterParent ();
3460
3519
$ table =$ this ->getCurrentTable ();// almost equivalent to last table element, except it can be html
3461
- /* When a node node is to be foster parented, the node node must be
3462
- * inserted into the foster parent element, and the current table must
3463
- * be marked as tainted. (Once the current table has been tainted,
3464
- * whitespace characters are inserted into the foster parent element
3465
- * instead of the current node.) */
3466
- $ table ->tainted =true ;
3520
+ /* When a node node is to be foster parented, the node node must be
3521
+ * be inserted into the foster parent element. */
3467
3522
/* If the foster parent element is the parent element of the last table
3468
3523
* element in the stack of open elements, then node must be inserted
3469
3524
* immediately before the last table element in the stack of open