Commit8238648

committed

Fix all the files outside of html5lib to flake8 cleanly

1 parent2c3b64b commit8238648Copy full SHA for 8238648

File tree

5 files changed

+80

-58

lines changed

5 files changed

+80

-58

lines changed

`‎flake8-run.sh‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -5,5 +5,5 @@ if [[ ! -x $(which flake8) ]]; then`
`5`	`5`	`exit 1`
`6`	`6`	`fi`
`7`	`7`
`8`		`-flake8html5lib`
	`8`	+flake8`dirname$0`
`9`	`9`	`exit$?`

`‎parse.py‎`

Lines changed: 19 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,6 @@`
`5`	`5`	`"""`
`6`	`6`
`7`	`7`	`importsys`
`8`		`-importos`
`9`	`8`	`importtraceback`
`10`	`9`	`fromoptparseimportOptionParser`
`11`	`10`
`@@ -15,17 +14,21 @@`
`15`	`14`	`fromhtml5libimportconstants`
`16`	`15`	`fromhtml5libimportutils`
`17`	`16`
	`17`	`+`
`18`	`18`	`defparse():`
`19`	`19`	`optParser=getOptParser()`
`20`		`-opts,args=optParser.parse_args()`
	`20`	`+opts,args=optParser.parse_args()`
`21`	`21`	`encoding="utf8"`
`22`	`22`
`23`	`23`	`try:`
`24`	`24`	`f=args[-1]`
`25`	`25`	`# Try opening from the internet`
`26`	`26`	`iff.startswith('http://'):`
`27`	`27`	`try:`
`28`		`-importurllib.request,urllib.parse,urllib.error,cgi`
	`28`	`+importurllib.request`
	`29`	`+importurllib.parse`
	`30`	`+importurllib.error`
	`31`	`+importcgi`
`29`	`32`	`f=urllib.request.urlopen(f)`
`30`	`33`	`contentType=f.headers.get('content-type')`
`31`	`34`	`ifcontentType:`
`@@ -41,7 +44,7 @@ def parse():`
`41`	`44`	`try:`
`42`	`45`	`# Try opening from file system`
`43`	`46`	`f=open(f,"rb")`
`44`		`-exceptIOErrorase:`
	`47`	`+exceptIOErrorase:`
`45`	`48`	`sys.stderr.write("Unable to open file: %s\n"%e)`
`46`	`49`	`sys.exit(1)`
`47`	`50`	`exceptIndexError:`
`@@ -82,14 +85,15 @@ def parse():`
`82`	`85`	`ifdocument:`
`83`	`86`	`printOutput(p,document,opts)`
`84`	`87`	`t2=time.time()`
`85`		`-sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0,t2-t1))`
	`88`	`+sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0,t2-t1))`
`86`	`89`	`else:`
`87`		`-sys.stderr.write("\n\nRun took: %fs"%(t1-t0))`
	`90`	`+sys.stderr.write("\n\nRun took: %fs"%(t1-t0))`
`88`	`91`	`else:`
`89`	`92`	`document=run(parseMethod,f,encoding,opts.scripting)`
`90`	`93`	`ifdocument:`
`91`	`94`	`printOutput(p,document,opts)`
`92`	`95`
	`96`	`+`
`93`	`97`	`defrun(parseMethod,f,encoding,scripting):`
`94`	`98`	`try:`
`95`	`99`	`document=parseMethod(f,encoding=encoding,scripting=scripting)`
`@@ -98,6 +102,7 @@ def run(parseMethod, f, encoding, scripting):`
`98`	`102`	`traceback.print_exc()`
`99`	`103`	`returndocument`
`100`	`104`
	`105`	`+`
`101`	`106`	`defprintOutput(parser,document,opts):`
`102`	`107`	`ifopts.encoding:`
`103`	`108`	`print("Encoding:",parser.tokenizer.stream.charEncoding)`
`@@ -116,7 +121,7 @@ def printOutput(parser, document, opts):`
`116`	`121`	`eliftb=="etree":`
`117`	`122`	`sys.stdout.write(utils.default_etree.tostring(document))`
`118`	`123`	`elifopts.tree:`
`119`		`-ifnothasattr(document,'__getitem__'):`
	`124`	`+ifnothasattr(document,'__getitem__'):`
`120`	`125`	`document= [document]`
`121`	`126`	`forfragmentindocument:`
`122`	`127`	`print(parser.tree.testSerializer(fragment))`
`@@ -126,7 +131,7 @@ def printOutput(parser, document, opts):`
`126`	`131`	`kwargs= {}`
`127`	`132`	`foroptinserializer.HTMLSerializer.options:`
`128`	`133`	`try:`
`129`		`-kwargs[opt]=getattr(opts,opt)`
	`134`	`+kwargs[opt]=getattr(opts,opt)`
`130`	`135`	`except:`
`131`	`136`	`pass`
`132`	`137`	`ifnotkwargs['quote_char']:`
`@@ -142,12 +147,14 @@ def printOutput(parser, document, opts):`
`142`	`147`	`encoding="utf-8"`
`143`	`148`	`fortextinserializer.HTMLSerializer(**kwargs).serialize(tokens,encoding=encoding):`
`144`	`149`	`sys.stdout.write(text)`
`145`		`-ifnottext.endswith('\n'):sys.stdout.write('\n')`
	`150`	`+ifnottext.endswith('\n'):`
	`151`	`+sys.stdout.write('\n')`
`146`	`152`	`ifopts.error:`
`147`		`-errList=[]`
	`153`	`+errList=[]`
`148`	`154`	`forpos,errorcode,datavarsinparser.errors:`
`149`		`-errList.append("Line %i Col %i"%pos+" "+constants.E.get(errorcode,'Unknown error "%s"'%errorcode)%datavars)`
`150`		`-sys.stdout.write("\nParse errors:\n"+"\n".join(errList)+"\n")`
	`155`	`+errList.append("Line %i Col %i"%pos+" "+constants.E.get(errorcode,'Unknown error "%s"'%errorcode)%datavars)`
	`156`	`+sys.stdout.write("\nParse errors:\n"+"\n".join(errList)+"\n")`
	`157`	`+`
`151`	`158`
`152`	`159`	`defgetOptParser():`
`153`	`160`	`parser=OptionParser(usage=__doc__)`

`‎setup.py‎`

Lines changed: 6 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`	`fromsetuptoolsimportsetup`
`6`	`6`
`7`	`7`
`8`		`-classifiers=[`
	`8`	`+classifiers=[`
`9`	`9`	`'Development Status :: 5 - Production/Stable',`
`10`	`10`	`'Intended Audience :: Developers',`
`11`	`11`	`'License :: OSI Approved :: MIT License',`
`@@ -20,9 +20,9 @@`
`20`	`20`	`'Programming Language :: Python :: 3.5',`
`21`	`21`	`'Topic :: Software Development :: Libraries :: Python Modules',`
`22`	`22`	`'Topic :: Text Processing :: Markup :: HTML'`
`23`		`-]`
	`23`	`+]`
`24`	`24`
`25`		`-packages= ['html5lib']+ ['html5lib.'+name`
	`25`	`+packages= ['html5lib']+ ['html5lib.'+name`
`26`	`26`	`fornameinos.listdir(os.path.join('html5lib'))`
`27`	`27`	`ifos.path.isdir(os.path.join('html5lib',name))and`
`28`	`28`	`notname.startswith('.')andname!='tests']`
`@@ -39,9 +39,9 @@`
`39`	`39`	`assignments=filter(lambdax:isinstance(x,ast.Assign),t.body)`
`40`	`40`	`forainassignments:`
`41`	`41`	`if (len(a.targets)==1and`
`42`		`-isinstance(a.targets[0],ast.Name)and`
`43`		`-a.targets[0].id=="__version__"and`
`44`		`-isinstance(a.value,ast.Str)):`
	`42`	`+isinstance(a.targets[0],ast.Name)and`
	`43`	`+a.targets[0].id=="__version__"and`
	`44`	`+isinstance(a.value,ast.Str)):`
`45`	`45`	`version=a.value.s`
`46`	`46`
`47`	`47`	`setup(name='html5lib',`

`‎utils/entities.py‎`

Lines changed: 31 additions & 19 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,57 +2,67 @@`
`2`	`2`
`3`	`3`	`importhtml5lib`
`4`	`4`
	`5`	`+`
`5`	`6`	`defparse(path="html5ents.xml"):`
`6`	`7`	`returnhtml5lib.parse(open(path),treebuilder="lxml")`
`7`	`8`
	`9`	`+`
`8`	`10`	`defentity_table(tree):`
`9`	`11`	`returndict((entity_name("".join(tr[0].xpath(".//text()"))),`
`10`	`12`	`entity_characters(tr[1].text))`
`11`	`13`	`fortrintree.xpath("//h:tbody/h:tr",`
`12`		`-namespaces={"h":"http://www.w3.org/1999/xhtml"}))`
	`14`	`+namespaces={"h":"http://www.w3.org/1999/xhtml"}))`
	`15`	`+`
`13`	`16`
`14`	`17`	`defentity_name(inp):`
`15`	`18`	`returninp.strip()`
`16`	`19`
	`20`	`+`
`17`	`21`	`defentity_characters(inp):`
`18`	`22`	`return"".join(codepoint_to_character(item)`
`19`		`-foritemininp.split()`
`20`		`-ifitem)`
	`23`	`+foritemininp.split()`
	`24`	`+ifitem)`
	`25`	`+`
`21`	`26`
`22`	`27`	`defcodepoint_to_character(inp):`
`23`		`-return ("\U000"+inp[2:]).decode("unicode-escape")`
	`28`	`+return ("\\U000"+inp[2:]).decode("unicode-escape")`
	`29`	`+`
`24`	`30`
`25`	`31`	`defmake_tests_json(entities):`
`26`	`32`	`test_list=make_test_list(entities)`
`27`	`33`	`tests_json= {"tests":`
`28`		`-[make_test(*item)foritemintest_list]`
	`34`	`+ [make_test(*item)foritemintest_list]`
`29`	`35`	`}`
`30`	`36`	`returntests_json`
`31`	`37`
	`38`	`+`
`32`	`39`	`defmake_test(name,characters,good):`
`33`	`40`	`return {`
`34`		`-"description":test_description(name,good),`
`35`		`-"input":"&%s"%name,`
`36`		`-"output":test_expected(name,characters,good)`
`37`		`- }`
	`41`	`+"description":test_description(name,good),`
	`42`	`+"input":"&%s"%name,`
	`43`	`+"output":test_expected(name,characters,good)`
	`44`	`+ }`
	`45`	`+`
`38`	`46`
`39`	`47`	`deftest_description(name,good):`
`40`	`48`	`with_semicolon=name.endswith(";")`
`41`		`-semicolon_text= {True:"with a semi-colon",`
`42`		`-False:"without a semi-colon"}[with_semicolon]`
	`49`	`+semicolon_text= {True:"with a semi-colon",`
	`50`	`+False:"without a semi-colon"}[with_semicolon]`
`43`	`51`	`ifgood:`
`44`		`-text="Named entity: %s %s"%(name,semicolon_text)`
	`52`	`+text="Named entity: %s %s"%(name,semicolon_text)`
`45`	`53`	`else:`
`46`		`-text="Bad named entity: %s %s"%(name,semicolon_text)`
	`54`	`+text="Bad named entity: %s %s"%(name,semicolon_text)`
`47`	`55`	`returntext`
`48`	`56`
	`57`	`+`
`49`	`58`	`deftest_expected(name,characters,good):`
`50`	`59`	`rv= []`
`51`	`60`	`ifnotgoodornotname.endswith(";"):`
`52`	`61`	`rv.append("ParseError")`
`53`	`62`	`rv.append(["Character",characters])`
`54`	`63`	`returnrv`
`55`	`64`
	`65`	`+`
`56`	`66`	`defmake_test_list(entities):`
`57`	`67`	`tests= []`
`58`	`68`	`forentity_name,charactersinentities.items():`
`@@ -61,20 +71,23 @@ def make_test_list(entities):`
`61`	`71`	`tests.append((entity_name,characters,True))`
`62`	`72`	`returnsorted(tests)`
`63`	`73`
	`74`	`+`
`64`	`75`	`defsubentity_exists(entity_name,entities):`
`65`	`76`	`foriinrange(1,len(entity_name)):`
`66`	`77`	`ifentity_name[:-i]inentities:`
`67`	`78`	`returnTrue`
`68`	`79`	`returnFalse`
`69`	`80`
	`81`	`+`
`70`	`82`	`defmake_entities_code(entities):`
`71`		`-entities_text="\n".join("\"%s\": u\"%s\","%(`
`72`		`-name,entities[name].encode(`
`73`		`-"unicode-escape").replace("\"","\\\""))`
`74`		`-fornameinsorted(entities.keys()))`
	`83`	`+entities_text="\n".join("\"%s\": u\"%s\","%(`
	`84`	`+name,entities[name].encode(`
	`85`	`+"unicode-escape").replace("\"","\\\""))`
	`86`	`+fornameinsorted(entities.keys()))`
`75`	`87`	`return"""entities = {`
`76`	`88`	`%s`
`77`		`-}"""%entities_text`
	`89`	`+}"""%entities_text`
	`90`	`+`
`78`	`91`
`79`	`92`	`defmain():`
`80`	`93`	`entities=entity_table(parse())`
`@@ -85,4 +98,3 @@ def main():`
`85`	`98`
`86`	`99`	`if__name__=="__main__":`
`87`	`100`	`main()`
`88`		`-`

`‎utils/spider.py‎`

Lines changed: 23 additions & 20 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,9 @@`
`7`	`7`	`s.spider("http://www.google.com", maxURLs=100)`
`8`	`8`	`"""`
`9`	`9`
`10`		`-importurllib.request,urllib.error,urllib.parse`
	`10`	`+importurllib.request`
	`11`	`+importurllib.error`
	`12`	`+importurllib.parse`
`11`	`13`	`importurllib.robotparser`
`12`	`14`	`importmd5`
`13`	`15`
`@@ -16,11 +18,13 @@`
`16`	`18`	`importhtml5lib`
`17`	`19`	`fromhtml5lib.treebuildersimportetree`
`18`	`20`
	`21`	`+`
`19`	`22`	`classSpider(object):`
	`23`	`+`
`20`	`24`	`def__init__(self):`
`21`	`25`	`self.unvisitedURLs=set()`
`22`	`26`	`self.visitedURLs=set()`
`23`		`-self.buggyURLs=set()`
	`27`	`+self.buggyURLs=set()`
`24`	`28`	`self.robotParser=urllib.robotparser.RobotFileParser()`
`25`	`29`	`self.contentDigest= {}`
`26`	`30`	`self.http=httplib2.Http(".cache")`
`@@ -70,18 +74,18 @@ def updateURLs(self, tree):`
`70`	`74`	`update the list of visited and unvisited URLs according to whether we`
`71`	`75`	`have seen them before or not"""`
`72`	`76`	`urls=set()`
`73`		`-#Remove all links we have already visited`
	`77`	`+#Remove all links we have already visited`
`74`	`78`	`forlinkintree.findall(".//a"):`
`75`		`-try:`
`76`		`-url=urllib.parse.urldefrag(link.attrib['href'])[0]`
`77`		`-if (urlandurlnotinself.unvisitedURLsandurl`
	`79`	`+try:`
	`80`	`+url=urllib.parse.urldefrag(link.attrib['href'])[0]`
	`81`	`+if (urlandurlnotinself.unvisitedURLsandurl`
`78`	`82`	`notinself.visitedURLs):`
`79`		`-urls.add(url)`
`80`		`-exceptKeyError:`
`81`		`-pass`
	`83`	`+urls.add(url)`
	`84`	`+exceptKeyError:`
	`85`	`+pass`
`82`	`86`
`83`		`-#Remove all non-http URLs and add a suitable base URL where that is`
`84`		`-#missing`
	`87`	`+#Remove all non-http URLs and add a suitable base URL where that is`
	`88`	`+#missing`
`85`	`89`	`newUrls=set()`
`86`	`90`	`forurlinurls:`
`87`	`91`	`splitURL=list(urllib.parse.urlsplit(url))`
`@@ -93,23 +97,22 @@ def updateURLs(self, tree):`
`93`	`97`	`urls=newUrls`
`94`	`98`
`95`	`99`	`responseHeaders= {}`
`96`		`-#Now we want to find the content types of the links we haven't visited`
	`100`	`+#Now we want to find the content types of the links we haven't visited`
`97`	`101`	`forurlinurls:`
`98`	`102`	`try:`
`99`	`103`	`resp,content=self.http.request(url,"HEAD")`
`100`	`104`	`responseHeaders[url]=resp`
`101`		`-exceptAttributeErrorasKeyError:`
`102`		`-#Don't know why this happens`
	`105`	`+exceptAttributeError:`
	`106`	`+#Don't know why this happens`
`103`	`107`	`pass`
`104`	`108`
`105`		`-`
`106`		`-#Remove links not of content-type html or pages not found`
`107`		`-#XXX - need to deal with other status codes?`
	`109`	`+# Remove links not of content-type html or pages not found`
	`110`	`+# XXX - need to deal with other status codes?`
`108`	`111`	`toVisit=set([urlforurlinurlsifurlinresponseHeadersand`
`109`		`-"html"inresponseHeaders[url]['content-type']and`
`110`		`-responseHeaders[url]['status']=="200"])`
	`112`	`+"html"inresponseHeaders[url]['content-type']and`
	`113`	`+responseHeaders[url]['status']=="200"])`
`111`	`114`
`112`		`-#Now check we are allowed to spider the page`
	`115`	`+#Now check we are allowed to spider the page`
`113`	`116`	`forurlintoVisit:`
`114`	`117`	`robotURL=list(urllib.parse.urlsplit(url)[:2])`
`115`	`118`	`robotURL.extend(["robots.txt","",""])`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit8238648

File tree

5 files changed

5 files changed

`‎flake8-run.sh‎`

`‎parse.py‎`

`‎setup.py‎`

`‎utils/entities.py‎`

`‎utils/spider.py‎`

0 commit comments