Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit768ba79

Browse files
committed
More stuff orking including treewalkers, parts of parse.py dom, (c)ElementTree
--HG--branch : svgmathmlextra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/branches/svgmathml%401266
1 parent10b9010 commit768ba79

31 files changed

+303
-264
lines changed

‎parse.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/envpython
1+
#!/usr/bin/envpython3.0
22
"""usage: %prog [options] filename
33
44
Parse a document to a simpletree tree, with optional profiling
@@ -9,11 +9,16 @@
99
importos
1010
fromoptparseimportOptionParser
1111

12+
print(sys.stdout.encoding)
13+
1214
#RELEASE remove
1315
sys.path.insert(0,os.path.abspath(os.path.join(__file__,'../src')))
1416
#END RELEASE
15-
fromhtml5libimporthtml5parser,liberalxmlparser,sanitizer
17+
print(sys.path)
18+
importhtml5lib
19+
importhtml5lib.html5parserashtml5parser
1620
fromhtml5lib.tokenizerimportHTMLTokenizer
21+
fromhtml5libimporttreebuilders
1722
fromhtml5libimporttreebuilders,serializer,treewalkers
1823
fromhtml5libimportconstants
1924

@@ -27,8 +32,8 @@ def parse():
2732
# Try opening from the internet
2833
iff.startswith('http://'):
2934
try:
30-
importurllib,cgi
31-
f=urllib.urlopen(f)
35+
fromurllibimportrequest
36+
f=request.urlopen(f)
3237
contentType=f.headers.get('content-type')
3338
ifcontentType:
3439
(mediaType,params)=cgi.parse_header(contentType)
@@ -39,7 +44,7 @@ def parse():
3944
else:
4045
try:
4146
# Try opening from file system
42-
f=open(f)
47+
f=open(f,"rb")
4348
exceptIOError:pass
4449
exceptIndexError:
4550
sys.stderr.write("No filename provided. Use -h for help\n")
@@ -64,16 +69,16 @@ def parse():
6469

6570
ifopts.profile:
6671
#XXX should import cProfile instead and use that
67-
importhotshot
68-
importhotshot.stats
69-
prof=hotshot.Profile('stats.prof')
70-
prof.runcall(parseMethod,f,encoding=encoding)
72+
try:
73+
importcProfileasprofile
74+
exceptImportError:
75+
importprofile
76+
importpstats
77+
prof=profile.run('parseMethod(f, encoding=encoding)','prof.out')
7178
prof.close()
7279
# XXX - We should use a temp file here
73-
stats=hotshot.stats.load('stats.prof')
74-
stats.strip_dirs()
75-
stats.sort_stats('time')
76-
stats.print_stats()
80+
stats=pstats.stats('prof.out')
81+
stats.strip_dirs().sort_stats('time').print_stats()
7782
elifopts.time:
7883
importtime
7984
t0=time.time()
@@ -88,13 +93,14 @@ def parse():
8893

8994
defprintOutput(parser,document,opts):
9095
ifopts.encoding:
91-
print"Encoding:",parser.tokenizer.stream.charEncoding
96+
print("Encoding:",parser.tokenizer.stream.charEncoding)
9297
ifopts.xml:
9398
sys.stdout.write(document.toxml("utf-8"))
9499
elifopts.tree:
95100
ifnothasattr(document,'__getitem__'):document= [document]
96101
forfragmentindocument:
97-
printparser.tree.testSerializer(fragment).encode("utf-8")
102+
sys.stdout.write(parser.tree.testSerializer(fragment))
103+
sys.stdout.write("\n")
98104
elifopts.hilite:
99105
sys.stdout.write(document.hilite("utf-8"))
100106
elifopts.html:
@@ -103,7 +109,7 @@ def printOutput(parser, document, opts):
103109
kwargs[opt]=getattr(opts,opt)
104110
ifnotkwargs['quote_char']:delkwargs['quote_char']
105111
tokens=treewalkers.getTreeWalker(opts.treebuilder)(document)
106-
fortextinserializer.HTMLSerializer(**kwargs).serialize(tokens,encoding='utf-8'):
112+
fortextinserializer.HTMLSerializer(**kwargs).serialize(tokens):
107113
sys.stdout.write(text)
108114
ifnottext.endswith('\n'):sys.stdout.write('\n')
109115
ifopts.error:

‎src/html5lib/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,9 @@
1010
f = open("my_document.html")
1111
tree = html5lib.parse(f)
1212
"""
13-
print(__path__)
1413

15-
#from .html5parser import HTMLParser, parse
16-
#from treebuilders import getTreeBuilder
14+
from .html5parserimportHTMLParser,parse
15+
from.treebuildersimportgetTreeBuilder
1716

1817
#from .liberalxmlparser import XMLParser, XHTMLParser
1918

‎src/html5lib/filters/formfiller.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
# See http://www.whatwg.org/specs/web-forms/current-work/#seeding
55
#
66

7-
import_base
7+
from .import_base
88

99
fromhtml5lib.constantsimportspaceCharacters
10-
spaceCharacters=u"".join(spaceCharacters)
10+
spaceCharacters="".join(spaceCharacters)
1111

1212
classSimpleFilter(_base.Filter):
1313
def__init__(self,source,fieldStorage):
@@ -29,13 +29,13 @@ def __iter__(self):
2929
input_checked_index=-1
3030
fori,(n,v)inenumerate(token["data"]):
3131
n=n.lower()
32-
ifn==u"name":
32+
ifn=="name":
3333
field_name=v.strip(spaceCharacters)
34-
elifn==u"type":
34+
elifn=="type":
3535
field_type=v.strip(spaceCharacters)
36-
elifn==u"checked":
36+
elifn=="checked":
3737
input_checked_index=i
38-
elifn==u"value":
38+
elifn=="value":
3939
input_value_index=i
4040

4141
value_list=self.fieldStorage.getlist(field_name)
@@ -45,20 +45,20 @@ def __iter__(self):
4545
else:
4646
value=""
4747

48-
iffield_typein (u"checkbox",u"radio"):
48+
iffield_typein ("checkbox","radio"):
4949
ifvalue_list:
5050
iftoken["data"][input_value_index][1]==value:
5151
ifinput_checked_index<0:
52-
token["data"].append((u"checked",u""))
52+
token["data"].append(("checked",""))
5353
field_indices[field_name]=field_index+1
5454
elifinput_checked_index>=0:
5555
deltoken["data"][input_checked_index]
5656

57-
eliffield_typenotin (u"button",u"submit",u"reset"):
57+
eliffield_typenotin ("button","submit","reset"):
5858
ifinput_value_index>=0:
59-
token["data"][input_value_index]= (u"value",value)
59+
token["data"][input_value_index]= ("value",value)
6060
else:
61-
token["data"].append((u"value",value))
61+
token["data"].append(("value",value))
6262
field_indices[field_name]=field_index+1
6363

6464
field_type=None
@@ -96,7 +96,7 @@ def __iter__(self):
9696
value=""
9797
if (is_select_multipleornotis_selected_option_found)andoption_value==value:
9898
ifoption_selected_index<0:
99-
token["data"].append((u"selected",u""))
99+
token["data"].append(("selected",""))
100100
field_indices[field_name]=field_index+1
101101
is_selected_option_found=True
102102
elifoption_selected_index>=0:

‎src/html5lib/filters/inject_meta_charset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import_base
1+
from .import_base
22

33
classFilter(_base.Filter):
44
def__init__(self,source,encoding):
@@ -23,7 +23,7 @@ def __iter__(self):
2323
content_index=-1
2424
fori,(name,value)inenumerate(token["data"]):
2525
ifname.lower()=='charset':
26-
token["data"][i]= (u'charset',self.encoding)
26+
token["data"][i]= ('charset',self.encoding)
2727
meta_found=True
2828
break
2929
elifname=='http-equiv'andvalue.lower()=='content-type':
@@ -32,7 +32,7 @@ def __iter__(self):
3232
content_index=i
3333
else:
3434
ifhas_http_equiv_content_typeandcontent_index>=0:
35-
token["data"][content_index]= (u'content',u'text/html; charset=%s'%self.encoding)
35+
token["data"][content_index]= ('content','text/html; charset=%s'%self.encoding)
3636
meta_found=True
3737

3838
eliftoken["name"].lower()=="head"andnotmeta_found:

‎src/html5lib/filters/iso639codes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,4 +746,4 @@ def isValidLangCode(value):
746746
lang,sublang=value.split('-',1)
747747
else:
748748
lang=value
749-
returnisoLang.has_key(unicode.lower(unicode(lang)))
749+
returnstr.lower(str(lang))inisoLang

‎src/html5lib/filters/lint.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
fromgettextimportgettext
22
_=gettext
33

4-
import_base
4+
from .import_base
55
fromhtml5lib.constantsimportcdataElements,rcdataElements,voidElements
66

77
fromhtml5lib.constantsimportspaceCharacters
8-
spaceCharacters=u"".join(spaceCharacters)
8+
spaceCharacters="".join(spaceCharacters)
99

1010
classLintError(Exception):pass
1111

@@ -19,22 +19,22 @@ def __iter__(self):
1919
name=token["name"]
2020
ifcontentModelFlag!="PCDATA":
2121
raiseLintError(_("StartTag not in PCDATA content model flag: %s")%name)
22-
ifnotisinstance(name,unicode):
23-
raiseLintError(_(u"Tag name is not a string: %r")%name)
22+
ifnotisinstance(name,str):
23+
raiseLintError(_("Tag name is not a string: %r")%name)
2424
ifnotname:
25-
raiseLintError(_(u"Empty tag name"))
25+
raiseLintError(_("Empty tag name"))
2626
iftype=="StartTag"andnameinvoidElements:
27-
raiseLintError(_(u"Void element reported as StartTag token: %s")%name)
27+
raiseLintError(_("Void element reported as StartTag token: %s")%name)
2828
eliftype=="EmptyTag"andnamenotinvoidElements:
29-
raiseLintError(_(u"Non-void element reported as EmptyTag token: %s")%token["name"])
29+
raiseLintError(_("Non-void element reported as EmptyTag token: %s")%token["name"])
3030
iftype=="StartTag":
3131
open_elements.append(name)
3232
forname,valueintoken["data"]:
33-
ifnotisinstance(name,unicode):
33+
ifnotisinstance(name,str):
3434
raiseLintError(_("Attribute name is not a string: %r")%name)
3535
ifnotname:
36-
raiseLintError(_(u"Empty attribute name"))
37-
ifnotisinstance(value,unicode):
36+
raiseLintError(_("Empty attribute name"))
37+
ifnotisinstance(value,str):
3838
raiseLintError(_("Attribute value is not a string: %r")%value)
3939
ifnameincdataElements:
4040
contentModelFlag="CDATA"
@@ -45,15 +45,15 @@ def __iter__(self):
4545

4646
eliftype=="EndTag":
4747
name=token["name"]
48-
ifnotisinstance(name,unicode):
49-
raiseLintError(_(u"Tag name is not a string: %r")%name)
48+
ifnotisinstance(name,str):
49+
raiseLintError(_("Tag name is not a string: %r")%name)
5050
ifnotname:
51-
raiseLintError(_(u"Empty tag name"))
51+
raiseLintError(_("Empty tag name"))
5252
ifnameinvoidElements:
53-
raiseLintError(_(u"Void element reported as EndTag token: %s")%name)
53+
raiseLintError(_("Void element reported as EndTag token: %s")%name)
5454
start_name=open_elements.pop()
5555
ifstart_name!=name:
56-
raiseLintError(_(u"EndTag (%s) does not match StartTag (%s)")% (name,start_name))
56+
raiseLintError(_("EndTag (%s) does not match StartTag (%s)")% (name,start_name))
5757
contentModelFlag="PCDATA"
5858

5959
eliftype=="Comment":
@@ -62,27 +62,27 @@ def __iter__(self):
6262

6363
eliftypein ("Characters","SpaceCharacters"):
6464
data=token["data"]
65-
ifnotisinstance(data,unicode):
65+
ifnotisinstance(data,str):
6666
raiseLintError(_("Attribute name is not a string: %r")%data)
6767
ifnotdata:
68-
raiseLintError(_(u"%s token with empty data")%type)
68+
raiseLintError(_("%s token with empty data")%type)
6969
iftype=="SpaceCharacters":
7070
data=data.strip(spaceCharacters)
7171
ifdata:
72-
raiseLintError(_(u"Non-space character(s) found in SpaceCharacters token: ")%data)
72+
raiseLintError(_("Non-space character(s) found in SpaceCharacters token: ")%data)
7373

7474
eliftype=="Doctype":
7575
name=token["name"]
7676
ifcontentModelFlag!="PCDATA":
7777
raiseLintError(_("Doctype not in PCDATA content model flag: %s")%name)
78-
ifnotisinstance(name,unicode):
79-
raiseLintError(_(u"Tag name is not a string: %r")%name)
78+
ifnotisinstance(name,str):
79+
raiseLintError(_("Tag name is not a string: %r")%name)
8080
# XXX: what to do with token["data"] ?
8181

8282
eliftypein ("ParseError","SerializeError"):
8383
pass
8484

8585
else:
86-
raiseLintError(_(u"Unknown token type: %s")%type)
86+
raiseLintError(_("Unknown token type: %s")%type)
8787

8888
yieldtoken

‎src/html5lib/filters/optionaltags.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import_base
1+
from .import_base
22

33
classFilter(_base.Filter):
44
defslider(self):

‎src/html5lib/filters/sanitizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import_base
1+
from .import_base
22
fromhtml5lib.sanitizerimportHTMLSanitizerMixin
33

44
classFilter(_base.Filter,HTMLSanitizerMixin):

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp