@@ -44,13 +44,6 @@ class HTML5_Tokenizer {
44
44
*/
45
45
private $ tree ;
46
46
47
- /**
48
- * Escape flag as specified by the HTML5 specification: "used to
49
- * control the behavior of the tokeniser. It is either true or
50
- * false, and initially must be set to the false state."
51
- */
52
- private $ escape =false ;
53
-
54
47
/**
55
48
* Current content model we are parsing as.
56
49
*/
@@ -111,8 +104,16 @@ public function parseFragment($context = null) {
111
104
* Performs the actual parsing of the document.
112
105
*/
113
106
public function parse () {
107
+ // Current state
114
108
$ state ='data ' ;
109
+ // This is used to avoid having to have look-behind in the data state.
115
110
$ lastFourChars ='' ;
111
+ /**
112
+ * Escape flag as specified by the HTML5 specification: "used to
113
+ * control the behavior of the tokeniser. It is either true or
114
+ * false, and initially must be set to the false state."
115
+ */
116
+ $ escape =false ;
116
117
//echo "\n\n";
117
118
while ($ state !==null ) {
118
119
/*
@@ -123,7 +124,7 @@ public function parse() {
123
124
case self::CDATA: echo 'CDATA'; break;
124
125
case self::PLAINTEXT: echo 'PLAINTEXT'; break;
125
126
}
126
- if ($this-> escape) echo " escape";
127
+ if ($escape) echo " escape";
127
128
echo "\n";
128
129
*/
129
130
switch ($ state ) {
@@ -139,7 +140,7 @@ public function parse() {
139
140
140
141
// see below for meaning
141
142
$ amp_cond =
142
- !$ this -> escape &&
143
+ !$ escape &&
143
144
(
144
145
$ this ->content_model ===self ::PCDATA ||
145
146
$ this ->content_model ===self ::RCDATA
@@ -151,7 +152,7 @@ public function parse() {
151
152
$ this ->content_model ===self ::RCDATA ||
152
153
$ this ->content_model ===self ::CDATA
153
154
) &&
154
- !$ this -> escape
155
+ !$ escape
155
156
);
156
157
157
158
if ($ char ==='& ' &&$ amp_cond ) {
@@ -168,7 +169,7 @@ public function parse() {
168
169
$ this ->content_model ===self ::RCDATA ||
169
170
$ this ->content_model ===self ::CDATA
170
171
) &&
171
- $ this -> escape ===false &&
172
+ $ escape ===false &&
172
173
$ lastFourChars ==='<!-- '
173
174
) {
174
175
/*
@@ -179,7 +180,7 @@ public function parse() {
179
180
last four characters in the input stream, including this one, are
180
181
U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
181
182
and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
182
- $ this -> escape =true ;
183
+ $ escape =true ;
183
184
184
185
/* In any case, emit the input character as a character token. Stay
185
186
in the data state. */
@@ -208,15 +209,15 @@ public function parse() {
208
209
$ this ->content_model ===self ::RCDATA ||
209
210
$ this ->content_model ===self ::CDATA
210
211
) &&
211
- $ this -> escape ===true &&
212
+ $ escape ===true &&
212
213
substr ($ lastFourChars ,1 ) ==='--> '
213
214
) {
214
215
/* If the content model flag is set to either the RCDATA state or
215
216
the CDATA state, and the escape flag is true, and the last three
216
217
characters in the input stream including this one are U+002D
217
218
HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
218
219
set the escape flag to false. */
219
- $ this -> escape =false ;
220
+ $ escape =false ;
220
221
221
222
/* In any case, emit the input character as a character token.
222
223
Stay in the data state. */