Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit576eeef

Browse files
committed
Merge in changes from default (for clean re-merge).
--HG--branch : numeric-entities
2 parents18e4601 +901876f commit576eeef

File tree

4 files changed

+119
-37
lines changed

4 files changed

+119
-37
lines changed

‎SPEC

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1-
3354
1+
3382
2+
3+
This is the last revision of the spec this library has been audited against.
4+
5+
Excluding: 3374
26

3-
(this is the last revision of the spec this library has been audited against)

‎library/HTML5/TreeBuilder.php

Lines changed: 110 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
// XERROR - with regards to parse errors
3232
// XSCRIPT - with regards to scripting mode
3333
// XENCODING - with regards to encoding (for reparsing tests)
34+
// XDOM - DOM specific code (tagName is explicitly not marked).
35+
// this is not (yet) in helper functions.
3436

3537
class HTML5_TreeBuilder {
3638
public$stack =array();
@@ -70,6 +72,9 @@ class HTML5_TreeBuilder {
7072
'p','param','plaintext','pre','script','select','spacer','style',
7173
'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
7274

75+
private$pendingTableCharacters;
76+
private$pendingTableCharactersDirty;
77+
7378
// Tree construction modes
7479
constINITIAL =0;
7580
constBEFORE_HTML =1;
@@ -80,19 +85,20 @@ class HTML5_TreeBuilder {
8085
constIN_BODY =6;
8186
constIN_CDATA_RCDATA =7;
8287
constIN_TABLE =8;
83-
constIN_CAPTION =9;
84-
constIN_COLUMN_GROUP =10;
85-
constIN_TABLE_BODY =11;
86-
constIN_ROW =12;
87-
constIN_CELL =13;
88-
constIN_SELECT =14;
89-
constIN_SELECT_IN_TABLE=15;
90-
constIN_FOREIGN_CONTENT=16;
91-
constAFTER_BODY =17;
92-
constIN_FRAMESET =18;
93-
constAFTER_FRAMESET =19;
94-
constAFTER_AFTER_BODY =20;
95-
constAFTER_AFTER_FRAMESET =21;
88+
constIN_TABLE_TEXT =9;
89+
constIN_CAPTION =10;
90+
constIN_COLUMN_GROUP =11;
91+
constIN_TABLE_BODY =12;
92+
constIN_ROW =13;
93+
constIN_CELL =14;
94+
constIN_SELECT =15;
95+
constIN_SELECT_IN_TABLE=16;
96+
constIN_FOREIGN_CONTENT=17;
97+
constAFTER_BODY =18;
98+
constIN_FRAMESET =19;
99+
constAFTER_FRAMESET =20;
100+
constAFTER_AFTER_BODY =21;
101+
constAFTER_AFTER_FRAMESET =22;
96102

97103
/**
98104
* Converts a magic number to a readable name. Use for debugging.
@@ -201,6 +207,7 @@ public function emitToken($token, $mode = null) {
201207
* doctype attribute of the Document object. */
202208
if (!isset($token['public']))$token['public'] =null;
203209
if (!isset($token['system']))$token['system'] =null;
210+
// XDOM
204211
// Yes this is hacky. I'm kind of annoyed that I can't appendChild
205212
// a doctype to DOMDocument. Maybe I haven't chanted the right
206213
// syllables.
@@ -363,6 +370,7 @@ public function emitToken($token, $mode = null) {
363370
}elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
364371
/* Append a Comment node to the Document object with the data
365372
attribute set to the data given in the comment token. */
373+
// XDOM
366374
$comment =$this->dom->createComment($token['data']);
367375
$this->dom->appendChild($comment);
368376

@@ -378,6 +386,7 @@ public function emitToken($token, $mode = null) {
378386
/* Create an element for the token in the HTML namespace. Append it
379387
* to the Document object. Put this element in the stack of open
380388
* elements. */
389+
// XDOM
381390
$html =$this->insertElement($token,false);
382391
$this->dom->appendChild($html);
383392
$this->stack[] =$html;
@@ -387,6 +396,7 @@ public function emitToken($token, $mode = null) {
387396
}else {
388397
/* Create an html element. Append it to the Document object. Put
389398
* this element in the stack of open elements. */
399+
// XDOM
390400
$html =$this->dom->createElementNS(self::NS_HTML,'html');
391401
$this->dom->appendChild($html);
392402
$this->stack[] =$html;
@@ -1744,6 +1754,7 @@ public function emitToken($token, $mode = null) {
17441754
* elements with an entry for the new element, and
17451755
* let node be the new element. */
17461756
// we don't know what the token is anymore
1757+
// XDOM
17471758
$clone =$node->cloneNode();
17481759
$a_pos =array_search($node,$this->a_formatting,true);
17491760
$s_pos =array_search($node,$this->stack,true);
@@ -1753,10 +1764,12 @@ public function emitToken($token, $mode = null) {
17531764

17541765
/* 6.6 Insert last node into node, first removing
17551766
it from its previous parent node if any. */
1767+
// XDOM
17561768
if($last_node->parentNode !==null) {
17571769
$last_node->parentNode->removeChild($last_node);
17581770
}
17591771

1772+
// XDOM
17601773
$node->appendChild($last_node);
17611774

17621775
/* 6.7 Let last node be node. */
@@ -1770,6 +1783,7 @@ public function emitToken($token, $mode = null) {
17701783
* whatever last node ended up being in the previous
17711784
* step, first removing it from its previous parent
17721785
* node if any. */
1786+
// XDOM
17731787
if ($last_node->parentNode) {// common step
17741788
$last_node->parentNode->removeChild($last_node);
17751789
}
@@ -1780,23 +1794,27 @@ public function emitToken($token, $mode = null) {
17801794
* first removing it from its previous parent node if
17811795
* any. */
17821796
}else {
1797+
// XDOM
17831798
$common_ancestor->appendChild($last_node);
17841799
}
17851800

17861801
/* 8. Create an element for the token for which the
17871802
* formatting element was created. */
1803+
// XDOM
17881804
$clone =$formatting_element->cloneNode();
17891805

17901806
/* 9. Take all of the child nodes of the furthest
17911807
block and append them to the element created in the
17921808
last step. */
1809+
// XDOM
17931810
while($furthest_block->hasChildNodes()) {
17941811
$child =$furthest_block->firstChild;
17951812
$furthest_block->removeChild($child);
17961813
$clone->appendChild($child);
17971814
}
17981815

17991816
/* 10. Append that clone to the furthest block. */
1817+
// XDOM
18001818
$furthest_block->appendChild($clone);
18011819

18021820
/* 11. Remove the formatting element from the list
@@ -1940,17 +1958,21 @@ public function emitToken($token, $mode = null) {
19401958
caseself::IN_TABLE:
19411959
$clear =array('html','table');
19421960

1943-
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
1944-
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1945-
or U+0020 SPACE */
1946-
if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
1947-
/* If the current table is tainted, then act as described in
1948-
* the "anything else" entry below. */
1949-
// Note: hsivonen has a test that fails due to this line
1950-
// because he wants to convince Hixie not to do taint
1951-
!$this->currentTableIsTainted()) {
1952-
/* Append the character to the current node. */
1953-
$this->insertText($token['data']);
1961+
/* A character token */
1962+
if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
1963+
$token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
1964+
/* Let the pending table character tokens
1965+
* be an empty list of tokens. */
1966+
$this->pendingTableCharacters ="";
1967+
$this->pendingTableCharactersDirty =false;
1968+
/* Let the original insertion mode be the current
1969+
* insertion mode. */
1970+
$this->original_mode =$this->mode;
1971+
/* Switch the insertion mode to
1972+
* "in table text" and
1973+
* reprocess the token. */
1974+
$this->mode =self::IN_TABLE_TEXT;
1975+
$this->emitToken($token);
19541976

19551977
/* A comment token */
19561978
}elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
@@ -2096,6 +2118,57 @@ public function emitToken($token, $mode = null) {
20962118
}
20972119
break;
20982120

2121+
caseself::IN_TABLE_TEXT:
2122+
/* A character token */
2123+
if($token['type'] === HTML5_Tokenizer::CHARACTER) {
2124+
/* Append the character token to the pending table
2125+
* character tokens list. */
2126+
$this->pendingTableCharacters .=$token['data'];
2127+
$this->pendingTableCharactersDirty =true;
2128+
}elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2129+
$this->pendingTableCharacters .=$token['data'];
2130+
/* Anything else */
2131+
}else {
2132+
if ($this->pendingTableCharacters !=='' &&is_string($this->pendingTableCharacters)) {
2133+
/* If any of the tokens in the pending table character tokens list
2134+
* are character tokens that are not one of U+0009 CHARACTER
2135+
* TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2136+
* U+0020 SPACE, then reprocess those character tokens using the
2137+
* rules given in the "anything else" entry in the in table"
2138+
* insertion mode.*/
2139+
if ($this->pendingTableCharactersDirty) {
2140+
/* Parse error. Process the token using the rules for the
2141+
* "in body" insertion mode, except that if the current
2142+
* node is a table, tbody, tfoot, thead, or tr element,
2143+
* then, whenever a node would be inserted into the current
2144+
* node, it must instead be foster parented. */
2145+
// XERROR
2146+
$old =$this->foster_parent;
2147+
$this->foster_parent =true;
2148+
$text_token =array(
2149+
'type' => HTML5_Tokenizer::CHARACTER,
2150+
'data' =>$this->pendingTableCharacters,
2151+
);
2152+
$this->processWithRulesFor($text_token,self::IN_BODY);
2153+
$this->foster_parent =$old;
2154+
2155+
/* Otherwise, insert the characters given by the pending table
2156+
* character tokens list into the current node. */
2157+
}else {
2158+
$this->insertText($this->pendingTableCharacters);
2159+
}
2160+
$this->pendingTableCharacters =null;
2161+
$this->pendingTableCharactersNull =null;
2162+
}
2163+
2164+
/* Switch the insertion mode to the original insertion mode and
2165+
* reprocess the token.
2166+
*/
2167+
$this->mode =$this->original_mode;
2168+
$this->emitToken($token);
2169+
}
2170+
break;
2171+
20992172
caseself::IN_CAPTION:
21002173
/* An end tag whose tag name is "caption" */
21012174
if($token['type'] === HTML5_Tokenizer::ENDTAG &&$token['name'] ==='caption') {
@@ -2694,6 +2767,7 @@ public function emitToken($token, $mode = null) {
26942767
// XERROR: parse error
26952768
}elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
26962769
$token['name'] ==='script' &&end($this->stack)->tagName ==='script' &&
2770+
// XDOM
26972771
end($this->stack)->namespaceURI ===self::NS_SVG) {
26982772
array_pop($this->stack);
26992773
// a bunch of script running mumbo jumbo
@@ -2702,20 +2776,23 @@ public function emitToken($token, $mode = null) {
27022776
((
27032777
$token['name'] !=='mglyph' &&
27042778
$token['name'] !=='malignmark' &&
2779+
// XDOM
27052780
end($this->stack)->namespaceURI ===self::NS_MATHML &&
27062781
in_array(end($this->stack)->tagName,array('mi','mo','mn','ms','mtext'))
27072782
) ||
27082783
(
27092784
$token['name'] ==='svg' &&
2785+
// XDOM
27102786
end($this->stack)->namespaceURI ===self::NS_MATHML &&
27112787
end($this->stack)->tagName ==='annotation-xml'
27122788
) ||
27132789
(
2790+
// XDOM
27142791
end($this->stack)->namespaceURI ===self::NS_SVG &&
27152792
in_array(end($this->stack)->tagName,array('foreignObject','desc','title'))
27162793
) ||
27172794
(
2718-
// XSKETCHY
2795+
// XSKETCHY && XDOM
27192796
end($this->stack)->namespaceURI ===self::NS_HTML
27202797
))
27212798
) ||$token['type'] === HTML5_Tokenizer::ENDTAG
@@ -2729,6 +2806,7 @@ public function emitToken($token, $mode = null) {
27292806
$found =false;
27302807
// this basically duplicates elementInScope()
27312808
for ($i =count($this->stack) -1;$i >=0;$i--) {
2809+
// XDOM
27322810
$node =$this->stack[$i];
27332811
if ($node->namespaceURI !==self::NS_HTML) {
27342812
$found =true;
@@ -2756,6 +2834,7 @@ public function emitToken($token, $mode = null) {
27562834
// XERROR: parse error
27572835
do {
27582836
$node =array_pop($this->stack);
2837+
// XDOM
27592838
}while ($node->namespaceURI !==self::NS_HTML);
27602839
$this->stack[] =$node;
27612840
$this->mode =$this->secondary_mode;
@@ -2799,6 +2878,7 @@ public function emitToken($token, $mode = null) {
27992878
'radialgradient' =>'radialGradient',
28002879
'textpath' =>'textPath',
28012880
);
2881+
// XDOM
28022882
$current =end($this->stack);
28032883
if ($current->namespaceURI ===self::NS_MATHML) {
28042884
$token =$this->adjustMathMLAttributes($token);
@@ -2835,6 +2915,7 @@ public function emitToken($token, $mode = null) {
28352915
/* Append a Comment node to the first element in the stack of open
28362916
elements (the html element), with the data attribute set to the
28372917
data given in the comment token. */
2918+
// XDOM
28382919
$comment =$this->dom->createComment($token['data']);
28392920
$this->stack[0]->appendChild($comment);
28402921

@@ -2985,6 +3066,7 @@ public function emitToken($token, $mode = null) {
29853066
if($token['type'] === HTML5_Tokenizer::COMMENT) {
29863067
/* Append a Comment node to the Document object with the data
29873068
attribute set to the data given in the comment token. */
3069+
// XDOM
29883070
$comment =$this->dom->createComment($token['data']);
29893071
$this->dom->appendChild($comment);
29903072

@@ -3008,6 +3090,7 @@ public function emitToken($token, $mode = null) {
30083090
if($token['type'] === HTML5_Tokenizer::COMMENT) {
30093091
/* Append a Comment node to the Document object with the data
30103092
attribute set to the data given in the comment token. */
3093+
// XDOM
30113094
$comment =$this->dom->createComment($token['data']);
30123095
$this->dom->appendChild($comment);
30133096

@@ -3458,12 +3541,8 @@ private function getFosterParent() {
34583541
publicfunctionfosterParent($node) {
34593542
$foster_parent =$this->getFosterParent();
34603543
$table =$this->getCurrentTable();// almost equivalent to last table element, except it can be html
3461-
/* When a node node is to be foster parented, the node node must be
3462-
* inserted into the foster parent element, and the current table must
3463-
* be marked as tainted. (Once the current table has been tainted,
3464-
* whitespace characters are inserted into the foster parent element
3465-
* instead of the current node.) */
3466-
$table->tainted =true;
3544+
/* When a node node is to be foster parented, the node node must be
3545+
* be inserted into the foster parent element. */
34673546
/* If the foster parent element is the parent element of the last table
34683547
* element in the stack of open elements, then node must be inserted
34693548
* immediately before the last table element in the stack of open

‎tests/HTML5/TestDataTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ class HTML5_TestDataTest extends UnitTestCase
77
functiontestSample() {
88
$data =newHTML5_TestData(dirname(__FILE__) .'/TestDataTest/sample.dat');
99
$this->assertIdentical($data->tests,array(
10-
array('data' =>"Foo\n",'des' =>"Bar\n"),
11-
array('data' =>"Foo\n")
10+
array('data' =>"Foo",'des' =>"Bar"),
11+
array('data' =>"Foo")
1212
));
1313
}
1414
functiontestStrDom() {

‎tests/HTML5/TokenizerPositionTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ class HTML5_PositionTestableTokenizer extends HTML5_TestableTokenizer
77
public$outputLines =array();
88
public$outputCols =array();
99
private$characterTokens =array();
10-
protectedfunctionemitToken($token,$checkStream =true) {
11-
parent::emitToken($token,$checkStream);
10+
protectedfunctionemitToken($token,$checkStream =true,$dry =false) {
11+
parent::emitToken($token,$checkStream,$dry);
1212
// XXX: The tests should really include the parse errors, but I'm lazy.
1313
switch ($token['type']) {
1414
caseself::PARSEERROR:

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp