@@ -41,6 +41,29 @@ a treebuilder:
4141with open (" mydocument.html" ," rb" )as f:
4242 lxml_etree_document= html5lib.parse(f,treebuilder = " lxml" )
4343
44+ When using with ``urllib2 `` (Python 2), the charset from HTTP should be
45+ pass into html5lib as follows:
46+
47+ ..code-block ::python
48+
49+ from contextlibimport closing
50+ from urllib2import urlopen
51+ import html5lib
52+
53+ with closing(urlopen(" http://example.com/" ))as f:
54+ document= html5lib.parse(f,encoding = f.info().getparam(" charset" ))
55+
56+ When using with ``urllib.request `` (Python 3), the charset from HTTP
57+ should be pass into html5lib as follows:
58+
59+ ..code-block ::python
60+
61+ from urllib.requestimport urlopen
62+ import html5lib
63+
64+ with urlopen(" http://example.com/" )as f:
65+ document= html5lib.parse(f,encoding = f.info().get_content_charset())
66+
4467 To have more control over the parser, create a parser object explicitly.
4568For instance, to make the parser raise exceptions on parse errors, use:
4669