Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9799e6f

Browse files
author
Edward Z. Yang ext:(%22)
committed
Implement fragment parsing, fix a bunch more bugs.
1 parent0adc0fe commit9799e6f

File tree

4 files changed

+114
-25
lines changed

4 files changed

+114
-25
lines changed

‎library/HTML5/Tokenizer.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,15 @@ public function __construct($data) {
9696
$this->content_model =self::PCDATA;
9797
}
9898

99+
publicfunctionparseFragment($context =null) {
100+
$this->tree->setupContext($context);
101+
if ($this->tree->content_model) {
102+
$this->content_model =$this->tree->content_model;
103+
$this->tree->content_model =null;
104+
}
105+
$this->parse();
106+
}
107+
99108
// XXX maybe convert this into an iterator? regardless, this function
100109
// and the save function should go into a Parser facade of some sort
101110
/**

‎library/HTML5/TreeConstructer.php

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class HTML5_TreeConstructer {
5858
// code can check for (bool)$ignore_lf_token, but it phases out
5959
// appropriately)
6060
private$ignore_lf_token =0;
61+
private$fragment =false;
62+
private$root;
6163

6264
// XFOREIGN: SVG's foreignObject is included in scoping
6365
private$scoping =array('applet','button','caption','html','marquee','object','table','td','th');
@@ -1955,6 +1957,7 @@ public function emitToken($token, $mode = null) {
19551957
scope with the same tag name as the token, this is a parse error.
19561958
Ignore the token. (fragment case) */
19571959
if(!$this->elementInScope($token['name'],true)) {
1960+
$this->ignored =true;
19581961
// Ignore
19591962

19601963
/* Otherwise: */
@@ -2999,7 +3002,7 @@ private function clearStackToTableContext($elements) {
29993002
}
30003003
}
30013004

3002-
privatefunctionresetInsertionMode() {
3005+
privatefunctionresetInsertionMode($context =null) {
30033006
/* 1. Let last be false. */
30043007
$last =false;
30053008
$leng =count($this->stack);
@@ -3008,16 +3011,16 @@ private function resetInsertionMode() {
30083011
/* 2. Let node be the last node in the stack of open elements. */
30093012
$node =$this->stack[$n];
30103013

3011-
/* 3. If node is the first node in the stack of open elements, then
3012-
set last to true. If the element whose innerHTML attribute is being
3013-
set is neither a td element nor a th element, then set node to the
3014-
element whose innerHTML attribute is being set. (innerHTML case) */
3014+
/* 3. If node is the first node in the stack of open elements, then
3015+
* set last to true and set node to the context element. (fragment
3016+
* case) */
30153017
if($this->stack[0]->isSameNode($node)) {
30163018
$last =true;
3019+
$node =$context;
30173020
}
30183021

30193022
/* 4. If node is a select element, then switch the insertion mode to
3020-
"in select" and abort these steps. (innerHTML case) */
3023+
"in select" and abort these steps. (fragment case) */
30213024
if($node->tagName ==='select') {
30223025
$this->mode =self::IN_SELECT;
30233026
break;
@@ -3037,7 +3040,7 @@ private function resetInsertionMode() {
30373040
/* 7. If node is a tbody, thead, or tfoot element, then switch the
30383041
insertion mode to "in table body" and abort these steps. */
30393042
}elseif(in_array($node->tagName,array('tbody','thead','tfoot'))) {
3040-
$this->mode =self::IN_TBODY;
3043+
$this->mode =self::IN_TABLE_BODY;
30413044
break;
30423045

30433046
/* 8. If node is a caption element, then switch the insertion mode
@@ -3049,7 +3052,7 @@ private function resetInsertionMode() {
30493052
/* 9. If node is a colgroup element, then switch the insertion mode
30503053
to "in column group" and abort these steps. (innerHTML case) */
30513054
}elseif($node->tagName ==='colgroup') {
3052-
$this->mode =self::IN_CGROUP;
3055+
$this->mode =self::IN_COLUMN_GROUP;
30533056
break;
30543057

30553058
/* 10. If node is a table element, then switch the insertion mode
@@ -3058,38 +3061,44 @@ private function resetInsertionMode() {
30583061
$this->mode =self::IN_TABLE;
30593062
break;
30603063

3061-
/* 11. If node is a head element, then switch the insertion mode
3064+
/* 11. If node is an element from the MathML namespace or the SVG
3065+
* namespace, then switch the insertion mode to "in foreign
3066+
* content", let the secondary insertion mode be "in body", and
3067+
* abort these steps. */
3068+
// XFOREIGN: implement me
3069+
3070+
/* 12. If node is a head element, then switch the insertion mode
30623071
to "in body" ("in body"! not "in head"!) and abort these steps.
3063-
(innerHTML case) */
3072+
(fragment case) */
30643073
}elseif($node->tagName ==='head') {
30653074
$this->mode =self::IN_BODY;
30663075
break;
30673076

3068-
/*12. If node is a body element, then switch the insertion mode to
3077+
/*13. If node is a body element, then switch the insertion mode to
30693078
"in body" and abort these steps. */
30703079
}elseif($node->tagName ==='body') {
30713080
$this->mode =self::IN_BODY;
30723081
break;
30733082

3074-
/*13. If node is a frameset element, then switch the insertion
3075-
mode to "in frameset" and abort these steps. (innerHTML case) */
3083+
/*14. If node is a frameset element, then switch the insertion
3084+
mode to "in frameset" and abort these steps. (fragment case) */
30763085
}elseif($node->tagName ==='frameset') {
3077-
$this->mode =self::IN_FRAME;
3086+
$this->mode =self::IN_FRAMESET;
30783087
break;
30793088

3080-
/*14. If node is an html element, then: if the head element
3089+
/*15. If node is an html element, then: if the head element
30813090
pointer is null, switch the insertion mode to "before head",
30823091
otherwise, switch the insertion mode to "after head". In either
3083-
case, abort these steps. (innerHTML case) */
3092+
case, abort these steps. (fragment case) */
30843093
}elseif($node->tagName ==='html') {
30853094
$this->mode = ($this->head_pointer ===null)
3086-
?self::BEFOR_HEAD
3095+
?self::BEFORE_HEAD
30873096
:self::AFTER_HEAD;
30883097

30893098
break;
30903099

3091-
/*15. If last is true, then set the insertion mode to "in body"
3092-
and abort these steps. (innerHTML case) */
3100+
/*16. If last is true, then set the insertion mode to "in body"
3101+
and abort these steps. (fragment case) */
30933102
}elseif($last) {
30943103
$this->mode =self::IN_BODY;
30953104
break;
@@ -3237,9 +3246,66 @@ public function currentTableIsTainted() {
32373246
return !empty($this->getCurrentTable()->tainted);
32383247
}
32393248

3249+
/**
3250+
* Sets up the tree constructor for building a fragment.
3251+
*/
3252+
publicfunctionsetupContext($context =null) {
3253+
$this->fragment =true;
3254+
$context =$this->dom->createElement($context);
3255+
if ($context) {
3256+
/* 4.1. Set the HTML parser's tokenization stage's content model
3257+
* flag according to the context element, as follows: */
3258+
switch ($context->tagName) {
3259+
case'title':case'textarea':
3260+
$this->content_model = HTML5_Tokenizer::RCDATA;
3261+
break;
3262+
case'style':case'script':case'xmp':case'iframe':
3263+
case'noembed':case'noframes':
3264+
$this->content_model = HTML5_Tokenizer::CDATA;
3265+
break;
3266+
case'noscript':
3267+
// XSCRIPT: assuming scripting is enabled
3268+
$this->content_model = HTML5_Tokenizer::CDATA;
3269+
break;
3270+
case'plaintext':
3271+
$this->content_model = HTML5_Tokenizer::PLAINTEXT;
3272+
break;
3273+
}
3274+
/* 4.2. Let root be a new html element with no attributes. */
3275+
$root =$this->dom->createElement('html');
3276+
$this->root =$root;
3277+
/* 4.3 Append the element root to the Document node created above. */
3278+
$this->dom->appendChild($root);
3279+
/* 4.4 Set up the parser's stack of open elements so that it
3280+
* contains just the single element root. */
3281+
$this->stack =array($root);
3282+
/* 4.5 Reset the parser's insertion mode appropriately. */
3283+
$this->resetInsertionMode($context);
3284+
/* 4.6 Set the parser's form element pointer to the nearest node
3285+
* to the context element that is a form element (going straight up
3286+
* the ancestor chain, and including the element itself, if it is a
3287+
* form element), or, if there is no such form element, to null. */
3288+
$node =$context;
3289+
do {
3290+
if ($node->tagName ==='form') {
3291+
$this->form_pointer =$node;
3292+
break;
3293+
}
3294+
}while ($node =$node->parentNode);
3295+
}
3296+
}
3297+
32403298

32413299
publicfunctionsave() {
3242-
return$this->dom;
3300+
if (!$this->fragment) {
3301+
return$this->dom;
3302+
}else {
3303+
if ($this->root) {
3304+
return$this->root->childNodes;
3305+
}else {
3306+
return$this->dom->childNodes;
3307+
}
3308+
}
32433309
}
32443310
}
32453311

‎tests/HTML5/TestData.php

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,19 @@ public function __construct($filename) {
7070
/**
7171
* Converts a DOMDocument into string form as seen in test cases.
7272
*/
73-
publicstaticfunctionstrDom($dom,$prefix ='|') {
73+
publicstaticfunctionstrDom($node,$prefix ='|') {
7474
// XXX: Doesn't handle svg and math correctly
7575
$ret =array();
7676
$indent =2;
7777
$level = -1;// since DOMDocument doesn't get rendered
7878
$skip =false;
79-
$next =$dom;
79+
$next =$node;
8080
while ($next) {
81+
if ($nextinstanceof DOMNodeList) {
82+
if (!$next->length)break;
83+
$next =$next->item(0);
84+
$level =0;
85+
}
8186
$text =false;
8287
$subnodes =array();
8388
switch ($next->nodeType) {
@@ -130,6 +135,7 @@ public static function strDom($dom, $prefix = '| ') {
130135
$next =$next->parentNode;
131136
$level--;
132137
$skip =true;
138+
if ($level <0)break;
133139
}else {
134140
$next =false;
135141
}

‎tests/HTML5/TreeBuilderTest.php

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,28 @@
55
SimpleTest::ignore('HTML5_TreeBuilderHarness');
66
class HTML5_TreeBuilderHarnessextends HTML5_TestDataHarness
77
{
8-
publicfunctionassertIdentical($expect,$actual,$input ='%s') {
8+
publicfunctionassertIdentical($expect,$actual,$test =array()) {
9+
$input =$test['data'];
10+
if (isset($test['document-fragment'])) {
11+
$input .="\nFragment:" .$test['document-fragment'];
12+
}
913
parent::assertIdentical($expect,$actual,"Identical expectation failed\nInput:\n$input\n\nExpected:\n$expect\n\nActual:\n$actual\n");
1014
}
1115
publicfunctioninvoke($test) {
1216
// this is totally the wrong interface to use, but
1317
// for now we need testing
1418
$tokenizer =newHTML5_Tokenizer($test['data']);
1519
$GLOBALS['TIME'] -=get_microtime();
16-
$tokenizer->parse();
20+
if (isset($test['document-fragment'])) {
21+
$tokenizer->parseFragment($test['document-fragment']);
22+
}else {
23+
$tokenizer->parse();
24+
}
1725
$GLOBALS['TIME'] +=get_microtime();
1826
$this->assertIdentical(
1927
$test['document'],
2028
HTML5_TestData::strDom($tokenizer->save()),
21-
$test['data']
29+
$test
2230
);
2331
}
2432
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp