Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit93ee3b3

Browse files
committed
Merge pull request#137 from gsnedders/webencodings
Fix#124: Move to webencodings for decoding the input byte stream.
2 parents44b0bbc +85723e2 commit93ee3b3

File tree

11 files changed

+49
-306
lines changed

11 files changed

+49
-306
lines changed

‎.pytest.expect‎

-3.55 KB
Binary file not shown.

‎CHANGES.rst‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ Released on XXX
2222

2323
* Move testsuite to ``py.test``.
2424

25+
* Fix #124: move to webencodings for decoding the input byte stream;
26+
this makes html5lib compliant with the Encoding Standard, and
27+
introduces a required dependency on webencodings.
28+
2529

2630
0.9999999/1.0b8
2731
~~~~~~~~~~~~~~~

‎html5lib/constants.py‎

Lines changed: 0 additions & 229 deletions
Original file line numberDiff line numberDiff line change
@@ -2846,235 +2846,6 @@
28462846
0x9F:"\u0178",
28472847
}
28482848

2849-
encodings= {
2850-
'437':'cp437',
2851-
'850':'cp850',
2852-
'852':'cp852',
2853-
'855':'cp855',
2854-
'857':'cp857',
2855-
'860':'cp860',
2856-
'861':'cp861',
2857-
'862':'cp862',
2858-
'863':'cp863',
2859-
'865':'cp865',
2860-
'866':'cp866',
2861-
'869':'cp869',
2862-
'ansix341968':'ascii',
2863-
'ansix341986':'ascii',
2864-
'arabic':'iso8859-6',
2865-
'ascii':'ascii',
2866-
'asmo708':'iso8859-6',
2867-
'big5':'big5',
2868-
'big5hkscs':'big5hkscs',
2869-
'chinese':'gbk',
2870-
'cp037':'cp037',
2871-
'cp1026':'cp1026',
2872-
'cp154':'ptcp154',
2873-
'cp367':'ascii',
2874-
'cp424':'cp424',
2875-
'cp437':'cp437',
2876-
'cp500':'cp500',
2877-
'cp775':'cp775',
2878-
'cp819':'windows-1252',
2879-
'cp850':'cp850',
2880-
'cp852':'cp852',
2881-
'cp855':'cp855',
2882-
'cp857':'cp857',
2883-
'cp860':'cp860',
2884-
'cp861':'cp861',
2885-
'cp862':'cp862',
2886-
'cp863':'cp863',
2887-
'cp864':'cp864',
2888-
'cp865':'cp865',
2889-
'cp866':'cp866',
2890-
'cp869':'cp869',
2891-
'cp936':'gbk',
2892-
'cpgr':'cp869',
2893-
'cpis':'cp861',
2894-
'csascii':'ascii',
2895-
'csbig5':'big5',
2896-
'cseuckr':'cp949',
2897-
'cseucpkdfmtjapanese':'euc_jp',
2898-
'csgb2312':'gbk',
2899-
'cshproman8':'hp-roman8',
2900-
'csibm037':'cp037',
2901-
'csibm1026':'cp1026',
2902-
'csibm424':'cp424',
2903-
'csibm500':'cp500',
2904-
'csibm855':'cp855',
2905-
'csibm857':'cp857',
2906-
'csibm860':'cp860',
2907-
'csibm861':'cp861',
2908-
'csibm863':'cp863',
2909-
'csibm864':'cp864',
2910-
'csibm865':'cp865',
2911-
'csibm866':'cp866',
2912-
'csibm869':'cp869',
2913-
'csiso2022jp':'iso2022_jp',
2914-
'csiso2022jp2':'iso2022_jp_2',
2915-
'csiso2022kr':'iso2022_kr',
2916-
'csiso58gb231280':'gbk',
2917-
'csisolatin1':'windows-1252',
2918-
'csisolatin2':'iso8859-2',
2919-
'csisolatin3':'iso8859-3',
2920-
'csisolatin4':'iso8859-4',
2921-
'csisolatin5':'windows-1254',
2922-
'csisolatin6':'iso8859-10',
2923-
'csisolatinarabic':'iso8859-6',
2924-
'csisolatincyrillic':'iso8859-5',
2925-
'csisolatingreek':'iso8859-7',
2926-
'csisolatinhebrew':'iso8859-8',
2927-
'cskoi8r':'koi8-r',
2928-
'csksc56011987':'cp949',
2929-
'cspc775baltic':'cp775',
2930-
'cspc850multilingual':'cp850',
2931-
'cspc862latinhebrew':'cp862',
2932-
'cspc8codepage437':'cp437',
2933-
'cspcp852':'cp852',
2934-
'csptcp154':'ptcp154',
2935-
'csshiftjis':'shift_jis',
2936-
'csunicode11utf7':'utf-7',
2937-
'cyrillic':'iso8859-5',
2938-
'cyrillicasian':'ptcp154',
2939-
'ebcdiccpbe':'cp500',
2940-
'ebcdiccpca':'cp037',
2941-
'ebcdiccpch':'cp500',
2942-
'ebcdiccphe':'cp424',
2943-
'ebcdiccpnl':'cp037',
2944-
'ebcdiccpus':'cp037',
2945-
'ebcdiccpwt':'cp037',
2946-
'ecma114':'iso8859-6',
2947-
'ecma118':'iso8859-7',
2948-
'elot928':'iso8859-7',
2949-
'eucjp':'euc_jp',
2950-
'euckr':'cp949',
2951-
'extendedunixcodepackedformatforjapanese':'euc_jp',
2952-
'gb18030':'gb18030',
2953-
'gb2312':'gbk',
2954-
'gb231280':'gbk',
2955-
'gbk':'gbk',
2956-
'greek':'iso8859-7',
2957-
'greek8':'iso8859-7',
2958-
'hebrew':'iso8859-8',
2959-
'hproman8':'hp-roman8',
2960-
'hzgb2312':'hz',
2961-
'ibm037':'cp037',
2962-
'ibm1026':'cp1026',
2963-
'ibm367':'ascii',
2964-
'ibm424':'cp424',
2965-
'ibm437':'cp437',
2966-
'ibm500':'cp500',
2967-
'ibm775':'cp775',
2968-
'ibm819':'windows-1252',
2969-
'ibm850':'cp850',
2970-
'ibm852':'cp852',
2971-
'ibm855':'cp855',
2972-
'ibm857':'cp857',
2973-
'ibm860':'cp860',
2974-
'ibm861':'cp861',
2975-
'ibm862':'cp862',
2976-
'ibm863':'cp863',
2977-
'ibm864':'cp864',
2978-
'ibm865':'cp865',
2979-
'ibm866':'cp866',
2980-
'ibm869':'cp869',
2981-
'iso2022jp':'iso2022_jp',
2982-
'iso2022jp2':'iso2022_jp_2',
2983-
'iso2022kr':'iso2022_kr',
2984-
'iso646irv1991':'ascii',
2985-
'iso646us':'ascii',
2986-
'iso88591':'windows-1252',
2987-
'iso885910':'iso8859-10',
2988-
'iso8859101992':'iso8859-10',
2989-
'iso885911987':'windows-1252',
2990-
'iso885913':'iso8859-13',
2991-
'iso885914':'iso8859-14',
2992-
'iso8859141998':'iso8859-14',
2993-
'iso885915':'iso8859-15',
2994-
'iso885916':'iso8859-16',
2995-
'iso8859162001':'iso8859-16',
2996-
'iso88592':'iso8859-2',
2997-
'iso885921987':'iso8859-2',
2998-
'iso88593':'iso8859-3',
2999-
'iso885931988':'iso8859-3',
3000-
'iso88594':'iso8859-4',
3001-
'iso885941988':'iso8859-4',
3002-
'iso88595':'iso8859-5',
3003-
'iso885951988':'iso8859-5',
3004-
'iso88596':'iso8859-6',
3005-
'iso885961987':'iso8859-6',
3006-
'iso88597':'iso8859-7',
3007-
'iso885971987':'iso8859-7',
3008-
'iso88598':'iso8859-8',
3009-
'iso885981988':'iso8859-8',
3010-
'iso88599':'windows-1254',
3011-
'iso885991989':'windows-1254',
3012-
'isoceltic':'iso8859-14',
3013-
'isoir100':'windows-1252',
3014-
'isoir101':'iso8859-2',
3015-
'isoir109':'iso8859-3',
3016-
'isoir110':'iso8859-4',
3017-
'isoir126':'iso8859-7',
3018-
'isoir127':'iso8859-6',
3019-
'isoir138':'iso8859-8',
3020-
'isoir144':'iso8859-5',
3021-
'isoir148':'windows-1254',
3022-
'isoir149':'cp949',
3023-
'isoir157':'iso8859-10',
3024-
'isoir199':'iso8859-14',
3025-
'isoir226':'iso8859-16',
3026-
'isoir58':'gbk',
3027-
'isoir6':'ascii',
3028-
'koi8r':'koi8-r',
3029-
'koi8u':'koi8-u',
3030-
'korean':'cp949',
3031-
'ksc5601':'cp949',
3032-
'ksc56011987':'cp949',
3033-
'ksc56011989':'cp949',
3034-
'l1':'windows-1252',
3035-
'l10':'iso8859-16',
3036-
'l2':'iso8859-2',
3037-
'l3':'iso8859-3',
3038-
'l4':'iso8859-4',
3039-
'l5':'windows-1254',
3040-
'l6':'iso8859-10',
3041-
'l8':'iso8859-14',
3042-
'latin1':'windows-1252',
3043-
'latin10':'iso8859-16',
3044-
'latin2':'iso8859-2',
3045-
'latin3':'iso8859-3',
3046-
'latin4':'iso8859-4',
3047-
'latin5':'windows-1254',
3048-
'latin6':'iso8859-10',
3049-
'latin8':'iso8859-14',
3050-
'latin9':'iso8859-15',
3051-
'ms936':'gbk',
3052-
'mskanji':'shift_jis',
3053-
'pt154':'ptcp154',
3054-
'ptcp154':'ptcp154',
3055-
'r8':'hp-roman8',
3056-
'roman8':'hp-roman8',
3057-
'shiftjis':'shift_jis',
3058-
'tis620':'cp874',
3059-
'unicode11utf7':'utf-7',
3060-
'us':'ascii',
3061-
'usascii':'ascii',
3062-
'utf16':'utf-16',
3063-
'utf16be':'utf-16-be',
3064-
'utf16le':'utf-16-le',
3065-
'utf8':'utf-8',
3066-
'windows1250':'cp1250',
3067-
'windows1251':'cp1251',
3068-
'windows1252':'cp1252',
3069-
'windows1253':'cp1253',
3070-
'windows1254':'cp1254',
3071-
'windows1255':'cp1255',
3072-
'windows1256':'cp1256',
3073-
'windows1257':'cp1257',
3074-
'windows1258':'cp1258',
3075-
'windows936':'gbk',
3076-
'x-x-big5':'big5'}
3077-
30782849
tokenTypes= {
30792850
"Doctype":0,
30802851
"Characters":1,

‎html5lib/html5parser.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def documentEncoding(self):
139139
"""
140140
ifnothasattr(self,'tokenizer'):
141141
returnNone
142-
returnself.tokenizer.stream.charEncoding[0]
142+
returnself.tokenizer.stream.charEncoding[0].name
143143

144144
defisHTMLIntegrationPoint(self,element):
145145
if (element.name=="annotation-xml"and

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp