Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit18e4601

Browse files
committed
Fix more errors, and change implementation to be the most recent one.
--HG--branch : numeric-entities
1 parent6b40e76 commit18e4601

File tree

1 file changed

+24
-26
lines changed

1 file changed

+24
-26
lines changed

‎library/HTML5/Tokenizer.php

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,28 +2211,29 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
22112211
'type' =>self::PARSEERROR,
22122212
'data' =>'illegal-windows-1252-entity'
22132213
));
2214-
$codepoint =$new_codepoint;
2214+
return HTML5_Data::utf8chr($new_codepoint);
22152215
}else {
2216-
// our logic is structured a little differently from the
2217-
// spec's but they're equivalent. The transform is:
2218-
// spec:
2219-
// return character for codepoint
2220-
// if in range:
2221-
// parse error
2222-
// ours:
2223-
// if in range:
2224-
// parse error
2225-
// return character for codepoint
2226-
/* Otherwise, if the number is in the range 0x0000 to 0x0008,
2227-
U+000B, U+000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF ,
2228-
0xFDD0 to 0xFDEF, or is one of 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF,
2229-
0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
2230-
0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF,
2231-
0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE,
2232-
0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
2233-
0x10FFFE, or 0x10FFFF, or is higher than 0x10FFFF, then this
2234-
is a parse error; return a character token for the U+FFFD
2235-
REPLACEMENT CHARACTER character instead. */
2216+
/* Otherwise, if the number is greater than 0x10FFFF, then
2217+
* this is a parse error. Return a U+FFFD REPLACEMENT
2218+
* CHARACTER. */
2219+
if ($codepoint >0x10FFFF) {
2220+
$this->emitToken(array(
2221+
'type' =>self::PARSEERROR,
2222+
'data' =>'overlong-character-entity'// XXX probably not correct
2223+
));
2224+
return"\xEF\xBF\xBD";
2225+
}
2226+
/* Otherwise, return a character token for the Unicode
2227+
* character whose code point is that number. If the
2228+
* number is in the range 0x0001 to 0x0008, 0x000E to
2229+
* 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to
2230+
* 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
2231+
* 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE,
2232+
* 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
2233+
* 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE,
2234+
* 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
2235+
* 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE,
2236+
* or 0x10FFFF, then this is a parse error. */
22362237
// && has higher precedence than ||
22372238
if (
22382239
$codepoint >=0x0000 &&$codepoint <=0x0008 ||
@@ -2242,18 +2243,15 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
22422243
$codepoint >=0xD800 &&$codepoint <=0xDFFF ||
22432244
$codepoint >=0xFDD0 &&$codepoint <=0xFDEF ||
22442245
($codepoint &0xFFFE) ===0xFFFE ||
2245-
$codepoint>0x10FFFF
2246+
$codepoint==0x10FFFF ||$codepoint ==0x10FFFE
22462247
) {
22472248
$this->emitToken(array(
22482249
'type' =>self::PARSEERROR,
22492250
'data' =>'illegal-codepoint-for-numeric-entity'
22502251
));
22512252
}
2253+
return HTML5_Data::utf8chr($codepoint);
22522254
}
2253-
2254-
/* Otherwise, return a character token for the Unicode
2255-
character whose code point is that number. */
2256-
return HTML5_Data::utf8chr($codepoint);
22572255
}
22582256

22592257
}else {

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp