55"""
66
77import sys
8- import os
98import traceback
109from optparse import OptionParser
1110
1514from html5lib import constants
1615from html5lib import utils
1716
17+
1818def parse ():
1919optParser = getOptParser ()
20- opts ,args = optParser .parse_args ()
20+ opts ,args = optParser .parse_args ()
2121encoding = "utf8"
2222
2323try :
2424f = args [- 1 ]
2525# Try opening from the internet
2626if f .startswith ('http://' ):
2727try :
28- import urllib .request ,urllib .parse ,urllib .error ,cgi
28+ import urllib .request
29+ import urllib .parse
30+ import urllib .error
31+ import cgi
2932f = urllib .request .urlopen (f )
3033contentType = f .headers .get ('content-type' )
3134if contentType :
@@ -41,7 +44,7 @@ def parse():
4144try :
4245# Try opening from file system
4346f = open (f ,"rb" )
44- except IOError as e :
47+ except IOError as e :
4548sys .stderr .write ("Unable to open file: %s\n " % e )
4649sys .exit (1 )
4750except IndexError :
@@ -82,14 +85,15 @@ def parse():
8285if document :
8386printOutput (p ,document ,opts )
8487t2 = time .time ()
85- sys .stderr .write ("\n \n Run took: %fs (plus %fs to print the output)" % (t1 - t0 ,t2 - t1 ))
88+ sys .stderr .write ("\n \n Run took: %fs (plus %fs to print the output)" % (t1 - t0 ,t2 - t1 ))
8689else :
87- sys .stderr .write ("\n \n Run took: %fs" % (t1 - t0 ))
90+ sys .stderr .write ("\n \n Run took: %fs" % (t1 - t0 ))
8891else :
8992document = run (parseMethod ,f ,encoding ,opts .scripting )
9093if document :
9194printOutput (p ,document ,opts )
9295
96+
9397def run (parseMethod ,f ,encoding ,scripting ):
9498try :
9599document = parseMethod (f ,encoding = encoding ,scripting = scripting )
@@ -98,6 +102,7 @@ def run(parseMethod, f, encoding, scripting):
98102traceback .print_exc ()
99103return document
100104
105+
101106def printOutput (parser ,document ,opts ):
102107if opts .encoding :
103108print ("Encoding:" ,parser .tokenizer .stream .charEncoding )
@@ -116,7 +121,7 @@ def printOutput(parser, document, opts):
116121elif tb == "etree" :
117122sys .stdout .write (utils .default_etree .tostring (document ))
118123elif opts .tree :
119- if not hasattr (document ,'__getitem__' ):
124+ if not hasattr (document ,'__getitem__' ):
120125document = [document ]
121126for fragment in document :
122127print (parser .tree .testSerializer (fragment ))
@@ -126,7 +131,7 @@ def printOutput(parser, document, opts):
126131kwargs = {}
127132for opt in serializer .HTMLSerializer .options :
128133try :
129- kwargs [opt ]= getattr (opts ,opt )
134+ kwargs [opt ]= getattr (opts ,opt )
130135except :
131136pass
132137if not kwargs ['quote_char' ]:
@@ -142,12 +147,14 @@ def printOutput(parser, document, opts):
142147encoding = "utf-8"
143148for text in serializer .HTMLSerializer (** kwargs ).serialize (tokens ,encoding = encoding ):
144149sys .stdout .write (text )
145- if not text .endswith ('\n ' ):sys .stdout .write ('\n ' )
150+ if not text .endswith ('\n ' ):
151+ sys .stdout .write ('\n ' )
146152if opts .error :
147- errList = []
153+ errList = []
148154for pos ,errorcode ,datavars in parser .errors :
149- errList .append ("Line %i Col %i" % pos + " " + constants .E .get (errorcode ,'Unknown error "%s"' % errorcode )% datavars )
150- sys .stdout .write ("\n Parse errors:\n " + "\n " .join (errList )+ "\n " )
155+ errList .append ("Line %i Col %i" % pos + " " + constants .E .get (errorcode ,'Unknown error "%s"' % errorcode )% datavars )
156+ sys .stdout .write ("\n Parse errors:\n " + "\n " .join (errList )+ "\n " )
157+
151158
152159def getOptParser ():
153160parser = OptionParser (usage = __doc__ )