Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6b40e76

Browse files
author
Edward Z. Yang ext:(%22)
committed
Initial implementation of numeric entities and tests, not complete, need spec clarification.
--HG--branch : numeric-entities
1 parent1cbacc5 commit6b40e76

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

‎library/HTML5/Data.php

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@ class HTML5_Data
88
// at some point this should be moved to a .ser file. Another
99
// possible optimization is to give UTF-8 bytes, not Unicode
1010
// codepoints
11+
// XXX: Not quite sure why it's named this; this is
12+
// actually the numeric entity dereference table.
1113
protectedstatic$realCodepointTable =array(
14+
0x00 =>0xFFFD,// REPLACEMENT CHARACTER
1215
0x0D =>0x000A,// LINE FEED (LF)
1316
0x80 =>0x20AC,// EURO SIGN ('€')
14-
0x81 =>0xFFFD,//REPLACEMENT CHARACTER
17+
0x81 =>0x0081,//<control>
1518
0x82 =>0x201A,// SINGLE LOW-9 QUOTATION MARK ('‚')
1619
0x83 =>0x0192,// LATIN SMALL LETTER F WITH HOOK ('ƒ')
1720
0x84 =>0x201E,// DOUBLE LOW-9 QUOTATION MARK ('„')
@@ -23,10 +26,10 @@ class HTML5_Data
2326
0x8A =>0x0160,// LATIN CAPITAL LETTER S WITH CARON ('Š')
2427
0x8B =>0x2039,// SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹')
2528
0x8C =>0x0152,// LATIN CAPITAL LIGATURE OE ('Œ')
26-
0x8D =>0xFFFD,//REPLACEMENT CHARACTER
29+
0x8D =>0x008D,//<control>
2730
0x8E =>0x017D,// LATIN CAPITAL LETTER Z WITH CARON ('Ž')
28-
0x8F =>0xFFFD,//REPLACEMENT CHARACTER
29-
0x90 =>0xFFFD,//REPLACEMENT CHARACTER
31+
0x8F =>0x008F,//<control>
32+
0x90 =>0x0090,//<control>
3033
0x91 =>0x2018,// LEFT SINGLE QUOTATION MARK ('‘')
3134
0x92 =>0x2019,// RIGHT SINGLE QUOTATION MARK ('’')
3235
0x93 =>0x201C,// LEFT DOUBLE QUOTATION MARK ('“')
@@ -39,7 +42,7 @@ class HTML5_Data
3942
0x9A =>0x0161,// LATIN SMALL LETTER S WITH CARON ('š')
4043
0x9B =>0x203A,// SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›')
4144
0x9C =>0x0153,// LATIN SMALL LIGATURE OE ('œ')
42-
0x9D =>0xFFFD,//REPLACEMENT CHARACTER
45+
0x9D =>0x009D,//<control>
4346
0x9E =>0x017E,// LATIN SMALL LETTER Z WITH CARON ('ž')
4447
0x9F =>0x0178,// LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ')
4548
);
@@ -71,12 +74,13 @@ public static function getNamedCharacterReferences() {
7174
* shamelessly stolen from Feyd (which is in public domain).
7275
*/
7376
publicstaticfunctionutf8chr($code) {
74-
if($code >0x10FFFFor$code <0x0or
77+
/* We don't care: we live dangerously
78+
* if($code > 0x10FFFF or $code < 0x0 or
7579
($code >= 0xD800 and $code <= 0xDFFF) ) {
7680
// bits are set outside the "valid" range as defined
7781
// by UNICODE 4.1.0
7882
return "\xEF\xBF\xBD";
79-
}
83+
}*/
8084

8185
$x =$y =$z =$w =0;
8286
if ($code <0x80) {

‎library/HTML5/Tokenizer.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,16 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
22132213
));
22142214
$codepoint =$new_codepoint;
22152215
}else {
2216+
// our logic is structured a little differently from the
2217+
// spec's but they're equivalent. The transform is:
2218+
// spec:
2219+
// return character for codepoint
2220+
// if in range:
2221+
// parse error
2222+
// ours:
2223+
// if in range:
2224+
// parse error
2225+
// return character for codepoint
22162226
/* Otherwise, if the number is in the range 0x0000 to 0x0008,
22172227
U+000B, U+000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF ,
22182228
0xFDD0 to 0xFDEF, or is one of 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF,
@@ -2238,7 +2248,6 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
22382248
'type' =>self::PARSEERROR,
22392249
'data' =>'illegal-codepoint-for-numeric-entity'
22402250
));
2241-
$codepoint =0xFFFD;
22422251
}
22432252
}
22442253

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp