NotificationsYou must be signed in to change notification settings
Fork69
Star96

Commit18e4601

committed

Fix more errors, and change implementation to be the most recent one.

--HG--branch : numeric-entities

1 parent6b40e76 commit18e4601Copy full SHA for 18e4601

File tree

1 file changed

+24

-26

lines changed

library/HTML5
- Tokenizer.php

1 file changed

+24

-26

lines changed

`‎library/HTML5/Tokenizer.php`

Lines changed: 24 additions & 26 deletions

Original file line number	Diff line number	Diff line change
`@@ -2211,28 +2211,29 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {`
`2211`	`2211`	`'type' =>self::PARSEERROR,`
`2212`	`2212`	`'data' =>'illegal-windows-1252-entity'`
`2213`	`2213`	`));`
`2214`		`-$codepoint =$new_codepoint;`
	`2214`	`+return HTML5_Data::utf8chr($new_codepoint);`
`2215`	`2215`	`}else {`
`2216`		`-// our logic is structured a little differently from the`
`2217`		`-// spec's but they're equivalent. The transform is:`
`2218`		`-// spec:`
`2219`		`-// return character for codepoint`
`2220`		`-// if in range:`
`2221`		`-// parse error`
`2222`		`-// ours:`
`2223`		`-// if in range:`
`2224`		`-// parse error`
`2225`		`-// return character for codepoint`
`2226`		`-/* Otherwise, if the number is in the range 0x0000 to 0x0008,`
`2227`		`- U+000B, U+000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF ,`
`2228`		`- 0xFDD0 to 0xFDEF, or is one of 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF,`
`2229`		`- 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,`
`2230`		`- 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF,`
`2231`		`- 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE,`
`2232`		`- 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,`
`2233`		`- 0x10FFFE, or 0x10FFFF, or is higher than 0x10FFFF, then this`
`2234`		`- is a parse error; return a character token for the U+FFFD`
`2235`		`- REPLACEMENT CHARACTER character instead. */`
	`2216`	`+/* Otherwise, if the number is greater than 0x10FFFF, then`
	`2217`	`+ * this is a parse error. Return a U+FFFD REPLACEMENT`
	`2218`	`+ * CHARACTER. */`
	`2219`	`+if ($codepoint >0x10FFFF) {`
	`2220`	`+$this->emitToken(array(`
	`2221`	`+'type' =>self::PARSEERROR,`
	`2222`	`+'data' =>'overlong-character-entity'// XXX probably not correct`
	`2223`	`+ ));`
	`2224`	`+return"\xEF\xBF\xBD";`
	`2225`	`+ }`
	`2226`	`+/* Otherwise, return a character token for the Unicode`
	`2227`	`+ * character whose code point is that number. If the`
	`2228`	`+ * number is in the range 0x0001 to 0x0008, 0x000E to`
	`2229`	`+ * 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to`
	`2230`	`+ * 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,`
	`2231`	`+ * 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE,`
	`2232`	`+ * 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,`
	`2233`	`+ * 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE,`
	`2234`	`+ * 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,`
	`2235`	`+ * 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE,`
	`2236`	`+ * or 0x10FFFF, then this is a parse error. */`
`2236`	`2237`	`// && has higher precedence than \|\|`
`2237`	`2238`	`if (`
`2238`	`2239`	`$codepoint >=0x0000 &&$codepoint <=0x0008 \|\|`
`@@ -2242,18 +2243,15 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {`
`2242`	`2243`	`$codepoint >=0xD800 &&$codepoint <=0xDFFF \|\|`
`2243`	`2244`	`$codepoint >=0xFDD0 &&$codepoint <=0xFDEF \|\|`
`2244`	`2245`	`($codepoint &0xFFFE) ===0xFFFE \|\|`
`2245`		`-$codepoint>0x10FFFF`
	`2246`	`+$codepoint==0x10FFFF \|\|$codepoint ==0x10FFFE`
`2246`	`2247`	`) {`
`2247`	`2248`	`$this->emitToken(array(`
`2248`	`2249`	`'type' =>self::PARSEERROR,`
`2249`	`2250`	`'data' =>'illegal-codepoint-for-numeric-entity'`
`2250`	`2251`	`));`
`2251`	`2252`	`}`
	`2253`	`+return HTML5_Data::utf8chr($codepoint);`
`2252`	`2254`	`}`
`2253`		`-`
`2254`		`-/* Otherwise, return a character token for the Unicode`
`2255`		`- character whose code point is that number. */`
`2256`		`-return HTML5_Data::utf8chr($codepoint);`
`2257`	`2255`	`}`
`2258`	`2256`
`2259`	`2257`	`}else {`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit18e4601

File tree

1 file changed

1 file changed

`‎library/HTML5/Tokenizer.php`

0 commit comments