@@ -8,10 +8,13 @@ class HTML5_Data
8
8
// at some point this should be moved to a .ser file. Another
9
9
// possible optimization is to give UTF-8 bytes, not Unicode
10
10
// codepoints
11
+ // XXX: Not quite sure why it's named this; this is
12
+ // actually the numeric entity dereference table.
11
13
protected static $ realCodepointTable =array (
14
+ 0x00 =>0xFFFD ,// REPLACEMENT CHARACTER
12
15
0x0D =>0x000A ,// LINE FEED (LF)
13
16
0x80 =>0x20AC ,// EURO SIGN ('€')
14
- 0x81 =>0xFFFD ,//REPLACEMENT CHARACTER
17
+ 0x81 =>0x0081 ,//<control>
15
18
0x82 =>0x201A ,// SINGLE LOW-9 QUOTATION MARK ('‚')
16
19
0x83 =>0x0192 ,// LATIN SMALL LETTER F WITH HOOK ('ƒ')
17
20
0x84 =>0x201E ,// DOUBLE LOW-9 QUOTATION MARK ('„')
@@ -23,10 +26,10 @@ class HTML5_Data
23
26
0x8A =>0x0160 ,// LATIN CAPITAL LETTER S WITH CARON ('Š')
24
27
0x8B =>0x2039 ,// SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹')
25
28
0x8C =>0x0152 ,// LATIN CAPITAL LIGATURE OE ('Œ')
26
- 0x8D =>0xFFFD ,//REPLACEMENT CHARACTER
29
+ 0x8D =>0x008D ,//<control>
27
30
0x8E =>0x017D ,// LATIN CAPITAL LETTER Z WITH CARON ('Ž')
28
- 0x8F =>0xFFFD ,//REPLACEMENT CHARACTER
29
- 0x90 =>0xFFFD ,//REPLACEMENT CHARACTER
31
+ 0x8F =>0x008F ,//<control>
32
+ 0x90 =>0x0090 ,//<control>
30
33
0x91 =>0x2018 ,// LEFT SINGLE QUOTATION MARK ('‘')
31
34
0x92 =>0x2019 ,// RIGHT SINGLE QUOTATION MARK ('’')
32
35
0x93 =>0x201C ,// LEFT DOUBLE QUOTATION MARK ('“')
@@ -39,7 +42,7 @@ class HTML5_Data
39
42
0x9A =>0x0161 ,// LATIN SMALL LETTER S WITH CARON ('š')
40
43
0x9B =>0x203A ,// SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›')
41
44
0x9C =>0x0153 ,// LATIN SMALL LIGATURE OE ('œ')
42
- 0x9D =>0xFFFD ,//REPLACEMENT CHARACTER
45
+ 0x9D =>0x009D ,//<control>
43
46
0x9E =>0x017E ,// LATIN SMALL LETTER Z WITH CARON ('ž')
44
47
0x9F =>0x0178 ,// LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ')
45
48
);
@@ -71,12 +74,13 @@ public static function getNamedCharacterReferences() {
71
74
* shamelessly stolen from Feyd (which is in public domain).
72
75
*/
73
76
public static function utf8chr ($ code ) {
74
- if ($ code >0x10FFFF or $ code <0x0 or
77
+ /* We don't care: we live dangerously
78
+ * if($code > 0x10FFFF or $code < 0x0 or
75
79
($code >= 0xD800 and $code <= 0xDFFF) ) {
76
80
// bits are set outside the "valid" range as defined
77
81
// by UNICODE 4.1.0
78
82
return "\xEF\xBF\xBD";
79
- }
83
+ }*/
80
84
81
85
$ x =$ y =$ z =$ w =0 ;
82
86
if ($ code <0x80 ) {