@@ -158,8 +158,7 @@ public function emitToken($token, $mode = null) {
158
158
159
159
/* A character token that is one of U+0009 CHARACTER TABULATION,
160
160
* U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
161
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
162
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
161
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
163
162
/* Ignore the token. */
164
163
$ this ->ignored =true ;
165
164
}elseif ($ token ['type ' ] === HTML5_Tokenizer::DOCTYPE ) {
@@ -356,8 +355,7 @@ public function emitToken($token, $mode = null) {
356
355
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
357
356
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
358
357
or U+0020 SPACE */
359
- }elseif ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
360
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
358
+ }elseif ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
361
359
/* Ignore the token. */
362
360
$ this ->ignored =true ;
363
361
@@ -391,8 +389,7 @@ public function emitToken($token, $mode = null) {
391
389
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
392
390
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
393
391
or U+0020 SPACE */
394
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
395
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
392
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
396
393
/* Ignore the token. */
397
394
$ this ->ignored =true ;
398
395
@@ -465,8 +462,7 @@ public function emitToken($token, $mode = null) {
465
462
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
466
463
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
467
464
or U+0020 SPACE. */
468
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
469
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
465
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
470
466
/* Insert the character into the current node. */
471
467
$ this ->insertText ($ token ['data ' ]);
472
468
@@ -600,8 +596,7 @@ public function emitToken($token, $mode = null) {
600
596
array_pop ($ this ->stack );
601
597
$ this ->mode =self ::IN_HEAD ;
602
598
}elseif (
603
- ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
604
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) ||
599
+ ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) ||
605
600
($ token ['type ' ] === HTML5_Tokenizer::COMMENT ) ||
606
601
($ token ['type ' ] === HTML5_Tokenizer::STARTTAG && (
607
602
$ token ['name ' ] ==='link ' ||$ token ['name ' ] ==='meta ' ||
@@ -630,8 +625,7 @@ public function emitToken($token, $mode = null) {
630
625
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
631
626
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
632
627
or U+0020 SPACE */
633
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
634
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
628
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
635
629
/* Append the character to the current node. */
636
630
$ this ->insertText ($ token ['data ' ]);
637
631
@@ -702,6 +696,7 @@ public function emitToken($token, $mode = null) {
702
696
switch ($ token ['type ' ]) {
703
697
/* A character token */
704
698
case HTML5_Tokenizer::CHARACTER :
699
+ case HTML5_Tokenizer::SPACECHARACTER :
705
700
/* Reconstruct the active formatting elements, if any. */
706
701
$ this ->reconstructActiveFormattingElements ();
707
702
@@ -711,7 +706,10 @@ public function emitToken($token, $mode = null) {
711
706
/* If the token is not one of U+0009 CHARACTER TABULATION,
712
707
* U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
713
708
* SPACE, then set the frameset-ok flag to "not ok". */
714
- // YYY: not implemented
709
+ // i.e., if any of the characters is not whitespace
710
+ if (strlen ($ token ['data ' ]) !==strspn ($ token ['data ' ], HTML5_Tokenizer::WHITESPACE )) {
711
+ $ this ->flag_frameset_ok =false ;
712
+ }
715
713
break ;
716
714
717
715
/* A comment token */
@@ -1893,7 +1891,10 @@ public function emitToken($token, $mode = null) {
1893
1891
break ;
1894
1892
1895
1893
case self ::IN_CDATA_RCDATA :
1896
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER ) {
1894
+ if (
1895
+ $ token ['type ' ] === HTML5_Tokenizer::CHARACTER ||
1896
+ $ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER
1897
+ ) {
1897
1898
$ this ->insertText ($ token ['data ' ]);
1898
1899
}elseif ($ token ['type ' ] === HTML5_Tokenizer::EOF ) {
1899
1900
// parse error
@@ -1919,8 +1920,7 @@ public function emitToken($token, $mode = null) {
1919
1920
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
1920
1921
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1921
1922
or U+0020 SPACE */
1922
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
1923
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ]) &&
1923
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER &&
1924
1924
/* If the current table is tainted, then act as described in
1925
1925
* the "anything else" entry below. */
1926
1926
// Note: hsivonen has a test that fails due to this line
@@ -2142,8 +2142,7 @@ public function emitToken($token, $mode = null) {
2142
2142
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
2143
2143
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2144
2144
or U+0020 SPACE */
2145
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
2146
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
2145
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
2147
2146
/* Append the character to the current node. */
2148
2147
$ this ->insertText ($ token ['data ' ]);
2149
2148
@@ -2472,7 +2471,10 @@ public function emitToken($token, $mode = null) {
2472
2471
/* Handle the token as follows: */
2473
2472
2474
2473
/* A character token */
2475
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER ) {
2474
+ if (
2475
+ $ token ['type ' ] === HTML5_Tokenizer::CHARACTER ||
2476
+ $ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER
2477
+ ) {
2476
2478
/* Append the token's character to the current node. */
2477
2479
$ this ->insertText ($ token ['data ' ]);
2478
2480
@@ -2669,8 +2671,7 @@ public function emitToken($token, $mode = null) {
2669
2671
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
2670
2672
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2671
2673
or U+0020 SPACE */
2672
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
2673
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
2674
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
2674
2675
/* Process the token as it would be processed if the insertion mode
2675
2676
was "in body". */
2676
2677
$ this ->processWithRulesFor ($ token ,self ::IN_BODY );
@@ -2717,8 +2718,7 @@ public function emitToken($token, $mode = null) {
2717
2718
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
2718
2719
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2719
2720
U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
2720
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
2721
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
2721
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
2722
2722
/* Append the character to the current node. */
2723
2723
$ this ->insertText ($ token ['data ' ]);
2724
2724
@@ -2790,8 +2790,7 @@ public function emitToken($token, $mode = null) {
2790
2790
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
2791
2791
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2792
2792
U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
2793
- if ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
2794
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ])) {
2793
+ if ($ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ) {
2795
2794
/* Append the character to the current node. */
2796
2795
$ this ->insertText ($ token ['data ' ]);
2797
2796
@@ -2836,9 +2835,8 @@ public function emitToken($token, $mode = null) {
2836
2835
$ this ->dom ->appendChild ($ comment );
2837
2836
2838
2837
}elseif ($ token ['type ' ] === HTML5_Tokenizer::DOCTYPE ||
2839
- ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
2840
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ]) ||
2841
- ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='html ' ))) {
2838
+ $ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ||
2839
+ ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='html ' )) {
2842
2840
$ this ->processWithRulesFor ($ token ,self ::IN_BODY );
2843
2841
2844
2842
/* An end-of-file token */
@@ -2860,9 +2858,8 @@ public function emitToken($token, $mode = null) {
2860
2858
$ this ->dom ->appendChild ($ comment );
2861
2859
2862
2860
}elseif ($ token ['type ' ] === HTML5_Tokenizer::DOCTYPE ||
2863
- ($ token ['type ' ] === HTML5_Tokenizer::CHARACTER &&
2864
- preg_match ('/^[\t\n\x0b\x0c ]+$/ ' ,$ token ['data ' ]) ||
2865
- ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='html ' ))) {
2861
+ $ token ['type ' ] === HTML5_Tokenizer::SPACECHARACTER ||
2862
+ ($ token ['type ' ] === HTML5_Tokenizer::STARTTAG &&$ token ['name ' ] ==='html ' )) {
2866
2863
$ this ->processWithRulesFor ($ token ,self ::IN_BODY );
2867
2864
2868
2865
/* An end-of-file token */