Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc2de86f

Browse files
committed
Merge remote-tracking branch 'origin/master' into fix_tokenizer_201411
2 parentsdda96f8 +93ee3b3 commitc2de86f

21 files changed

+138
-474
lines changed

‎.pytest.expect

-3.55 KB
Binary file not shown.

‎CHANGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ Released on XXX
2222

2323
* Move testsuite to ``py.test``.
2424

25+
* Fix #124: move to webencodings for decoding the input byte stream;
26+
this makes html5lib compliant with the Encoding Standard, and
27+
introduces a required dependency on webencodings.
28+
2529

2630
0.9999999/1.0b8
2731
~~~~~~~~~~~~~~~

‎html5lib/constants.py

Lines changed: 0 additions & 229 deletions
Original file line numberDiff line numberDiff line change
@@ -2846,235 +2846,6 @@
28462846
0x9F:"\u0178",
28472847
}
28482848

2849-
encodings= {
2850-
'437':'cp437',
2851-
'850':'cp850',
2852-
'852':'cp852',
2853-
'855':'cp855',
2854-
'857':'cp857',
2855-
'860':'cp860',
2856-
'861':'cp861',
2857-
'862':'cp862',
2858-
'863':'cp863',
2859-
'865':'cp865',
2860-
'866':'cp866',
2861-
'869':'cp869',
2862-
'ansix341968':'ascii',
2863-
'ansix341986':'ascii',
2864-
'arabic':'iso8859-6',
2865-
'ascii':'ascii',
2866-
'asmo708':'iso8859-6',
2867-
'big5':'big5',
2868-
'big5hkscs':'big5hkscs',
2869-
'chinese':'gbk',
2870-
'cp037':'cp037',
2871-
'cp1026':'cp1026',
2872-
'cp154':'ptcp154',
2873-
'cp367':'ascii',
2874-
'cp424':'cp424',
2875-
'cp437':'cp437',
2876-
'cp500':'cp500',
2877-
'cp775':'cp775',
2878-
'cp819':'windows-1252',
2879-
'cp850':'cp850',
2880-
'cp852':'cp852',
2881-
'cp855':'cp855',
2882-
'cp857':'cp857',
2883-
'cp860':'cp860',
2884-
'cp861':'cp861',
2885-
'cp862':'cp862',
2886-
'cp863':'cp863',
2887-
'cp864':'cp864',
2888-
'cp865':'cp865',
2889-
'cp866':'cp866',
2890-
'cp869':'cp869',
2891-
'cp936':'gbk',
2892-
'cpgr':'cp869',
2893-
'cpis':'cp861',
2894-
'csascii':'ascii',
2895-
'csbig5':'big5',
2896-
'cseuckr':'cp949',
2897-
'cseucpkdfmtjapanese':'euc_jp',
2898-
'csgb2312':'gbk',
2899-
'cshproman8':'hp-roman8',
2900-
'csibm037':'cp037',
2901-
'csibm1026':'cp1026',
2902-
'csibm424':'cp424',
2903-
'csibm500':'cp500',
2904-
'csibm855':'cp855',
2905-
'csibm857':'cp857',
2906-
'csibm860':'cp860',
2907-
'csibm861':'cp861',
2908-
'csibm863':'cp863',
2909-
'csibm864':'cp864',
2910-
'csibm865':'cp865',
2911-
'csibm866':'cp866',
2912-
'csibm869':'cp869',
2913-
'csiso2022jp':'iso2022_jp',
2914-
'csiso2022jp2':'iso2022_jp_2',
2915-
'csiso2022kr':'iso2022_kr',
2916-
'csiso58gb231280':'gbk',
2917-
'csisolatin1':'windows-1252',
2918-
'csisolatin2':'iso8859-2',
2919-
'csisolatin3':'iso8859-3',
2920-
'csisolatin4':'iso8859-4',
2921-
'csisolatin5':'windows-1254',
2922-
'csisolatin6':'iso8859-10',
2923-
'csisolatinarabic':'iso8859-6',
2924-
'csisolatincyrillic':'iso8859-5',
2925-
'csisolatingreek':'iso8859-7',
2926-
'csisolatinhebrew':'iso8859-8',
2927-
'cskoi8r':'koi8-r',
2928-
'csksc56011987':'cp949',
2929-
'cspc775baltic':'cp775',
2930-
'cspc850multilingual':'cp850',
2931-
'cspc862latinhebrew':'cp862',
2932-
'cspc8codepage437':'cp437',
2933-
'cspcp852':'cp852',
2934-
'csptcp154':'ptcp154',
2935-
'csshiftjis':'shift_jis',
2936-
'csunicode11utf7':'utf-7',
2937-
'cyrillic':'iso8859-5',
2938-
'cyrillicasian':'ptcp154',
2939-
'ebcdiccpbe':'cp500',
2940-
'ebcdiccpca':'cp037',
2941-
'ebcdiccpch':'cp500',
2942-
'ebcdiccphe':'cp424',
2943-
'ebcdiccpnl':'cp037',
2944-
'ebcdiccpus':'cp037',
2945-
'ebcdiccpwt':'cp037',
2946-
'ecma114':'iso8859-6',
2947-
'ecma118':'iso8859-7',
2948-
'elot928':'iso8859-7',
2949-
'eucjp':'euc_jp',
2950-
'euckr':'cp949',
2951-
'extendedunixcodepackedformatforjapanese':'euc_jp',
2952-
'gb18030':'gb18030',
2953-
'gb2312':'gbk',
2954-
'gb231280':'gbk',
2955-
'gbk':'gbk',
2956-
'greek':'iso8859-7',
2957-
'greek8':'iso8859-7',
2958-
'hebrew':'iso8859-8',
2959-
'hproman8':'hp-roman8',
2960-
'hzgb2312':'hz',
2961-
'ibm037':'cp037',
2962-
'ibm1026':'cp1026',
2963-
'ibm367':'ascii',
2964-
'ibm424':'cp424',
2965-
'ibm437':'cp437',
2966-
'ibm500':'cp500',
2967-
'ibm775':'cp775',
2968-
'ibm819':'windows-1252',
2969-
'ibm850':'cp850',
2970-
'ibm852':'cp852',
2971-
'ibm855':'cp855',
2972-
'ibm857':'cp857',
2973-
'ibm860':'cp860',
2974-
'ibm861':'cp861',
2975-
'ibm862':'cp862',
2976-
'ibm863':'cp863',
2977-
'ibm864':'cp864',
2978-
'ibm865':'cp865',
2979-
'ibm866':'cp866',
2980-
'ibm869':'cp869',
2981-
'iso2022jp':'iso2022_jp',
2982-
'iso2022jp2':'iso2022_jp_2',
2983-
'iso2022kr':'iso2022_kr',
2984-
'iso646irv1991':'ascii',
2985-
'iso646us':'ascii',
2986-
'iso88591':'windows-1252',
2987-
'iso885910':'iso8859-10',
2988-
'iso8859101992':'iso8859-10',
2989-
'iso885911987':'windows-1252',
2990-
'iso885913':'iso8859-13',
2991-
'iso885914':'iso8859-14',
2992-
'iso8859141998':'iso8859-14',
2993-
'iso885915':'iso8859-15',
2994-
'iso885916':'iso8859-16',
2995-
'iso8859162001':'iso8859-16',
2996-
'iso88592':'iso8859-2',
2997-
'iso885921987':'iso8859-2',
2998-
'iso88593':'iso8859-3',
2999-
'iso885931988':'iso8859-3',
3000-
'iso88594':'iso8859-4',
3001-
'iso885941988':'iso8859-4',
3002-
'iso88595':'iso8859-5',
3003-
'iso885951988':'iso8859-5',
3004-
'iso88596':'iso8859-6',
3005-
'iso885961987':'iso8859-6',
3006-
'iso88597':'iso8859-7',
3007-
'iso885971987':'iso8859-7',
3008-
'iso88598':'iso8859-8',
3009-
'iso885981988':'iso8859-8',
3010-
'iso88599':'windows-1254',
3011-
'iso885991989':'windows-1254',
3012-
'isoceltic':'iso8859-14',
3013-
'isoir100':'windows-1252',
3014-
'isoir101':'iso8859-2',
3015-
'isoir109':'iso8859-3',
3016-
'isoir110':'iso8859-4',
3017-
'isoir126':'iso8859-7',
3018-
'isoir127':'iso8859-6',
3019-
'isoir138':'iso8859-8',
3020-
'isoir144':'iso8859-5',
3021-
'isoir148':'windows-1254',
3022-
'isoir149':'cp949',
3023-
'isoir157':'iso8859-10',
3024-
'isoir199':'iso8859-14',
3025-
'isoir226':'iso8859-16',
3026-
'isoir58':'gbk',
3027-
'isoir6':'ascii',
3028-
'koi8r':'koi8-r',
3029-
'koi8u':'koi8-u',
3030-
'korean':'cp949',
3031-
'ksc5601':'cp949',
3032-
'ksc56011987':'cp949',
3033-
'ksc56011989':'cp949',
3034-
'l1':'windows-1252',
3035-
'l10':'iso8859-16',
3036-
'l2':'iso8859-2',
3037-
'l3':'iso8859-3',
3038-
'l4':'iso8859-4',
3039-
'l5':'windows-1254',
3040-
'l6':'iso8859-10',
3041-
'l8':'iso8859-14',
3042-
'latin1':'windows-1252',
3043-
'latin10':'iso8859-16',
3044-
'latin2':'iso8859-2',
3045-
'latin3':'iso8859-3',
3046-
'latin4':'iso8859-4',
3047-
'latin5':'windows-1254',
3048-
'latin6':'iso8859-10',
3049-
'latin8':'iso8859-14',
3050-
'latin9':'iso8859-15',
3051-
'ms936':'gbk',
3052-
'mskanji':'shift_jis',
3053-
'pt154':'ptcp154',
3054-
'ptcp154':'ptcp154',
3055-
'r8':'hp-roman8',
3056-
'roman8':'hp-roman8',
3057-
'shiftjis':'shift_jis',
3058-
'tis620':'cp874',
3059-
'unicode11utf7':'utf-7',
3060-
'us':'ascii',
3061-
'usascii':'ascii',
3062-
'utf16':'utf-16',
3063-
'utf16be':'utf-16-be',
3064-
'utf16le':'utf-16-le',
3065-
'utf8':'utf-8',
3066-
'windows1250':'cp1250',
3067-
'windows1251':'cp1251',
3068-
'windows1252':'cp1252',
3069-
'windows1253':'cp1253',
3070-
'windows1254':'cp1254',
3071-
'windows1255':'cp1255',
3072-
'windows1256':'cp1256',
3073-
'windows1257':'cp1257',
3074-
'windows1258':'cp1258',
3075-
'windows936':'gbk',
3076-
'x-x-big5':'big5'}
3077-
30782849
tokenTypes= {
30792850
"Doctype":0,
30802851
"Characters":1,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp