Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Only support python versions that receive security updates#580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
Mic92 wants to merge3 commits intohtml5lib:master
base:master
Choose a base branch
Loading
fromMic92:python2.7-removal
Open
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions.appveyor.yml
View file
Open in desktop

This file was deleted.

3 changes: 0 additions & 3 deletions.github/workflows/python-tox.yml
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -12,9 +12,6 @@ jobs:
os: [ubuntu-latest, windows-latest]
deps: [base, optional]
include:
- python: "pypy-2.7"
os: ubuntu-latest
deps: base
- python: "pypy-3.10"
os: ubuntu-latest
deps: base
Expand Down
16 changes: 2 additions & 14 deletionsREADME.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -29,7 +29,7 @@ or:

By default, the ``document`` will be an ``xml.etree`` element instance.
Whenever possible, html5lib chooses the accelerated ``ElementTree``
implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
implementation.

Two other tree types are supported: ``xml.dom.minidom`` and
``lxml.etree``. To use an alternative format, specify the name of
Expand All@@ -41,18 +41,6 @@ a treebuilder:
with open("mydocument.html", "rb") as f:
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")

When using with ``urllib2`` (Python 2), the charset from HTTP should be
pass into html5lib as follows:

.. code-block:: python

from contextlib import closing
from urllib2 import urlopen
import html5lib

with closing(urlopen("http://example.com/")) as f:
document = html5lib.parse(f, transport_encoding=f.info().getparam("charset"))

When using with ``urllib.request`` (Python 3), the charset from HTTP
should be pass into html5lib as follows:

Expand DownExpand Up@@ -90,7 +78,7 @@ More documentation is available at https://html5lib.readthedocs.io/.
Installation
------------

html5lib works on CPython2.7+, CPython 3.5+ and PyPy. To install:
html5lib works on CPython3.8+ and PyPy. To install:

.. code-block:: bash

Expand Down
1 change: 0 additions & 1 deletiondebug-info.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importprint_function,unicode_literals

importplatform
importsys
Expand Down
3 changes: 1 addition & 2 deletionsdoc/conf.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# html5lib documentation build configuration file, created by
# sphinx-quickstart on Wed May 8 00:04:49 2013.
Expand DownExpand Up@@ -100,7 +99,7 @@
}


classCExtMock(object):
classCExtMock:
"""Required for autodoc on readthedocs.org where you cannot build C extensions."""
def__init__(self,*args,**kwargs):
pass
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/__init__.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -20,7 +20,6 @@
* :func:`~.serializer.serialize`
"""

from __future__importabsolute_import,division,unicode_literals

from .html5parserimportHTMLParser,parse,parseFragment
from .treebuildersimportgetTreeBuilder
Expand Down
3 changes: 1 addition & 2 deletionshtml5lib/_ihatexml.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

importre
importwarnings
Expand DownExpand Up@@ -181,7 +180,7 @@ def escapeRegexp(string):
nonPubidCharRegexp=re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")


classInfosetFilter(object):
classInfosetFilter:
replacementRegexp=re.compile(r"U[\dA-F]{5,5}")

def__init__(self,
Expand Down
9 changes: 4 additions & 5 deletionshtml5lib/_inputstream.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from six.moves import http_client, urllib
Expand DownExpand Up@@ -48,7 +47,7 @@
charsUntilRegEx = {}


class BufferedStream(object):
class BufferedStream:
"""Buffering for streams that do not have buffering of their own

The buffer is implemented as a list of chunks on the assumption that
Expand DownExpand Up@@ -145,7 +144,7 @@ def HTMLInputStream(source, **kwargs):
return HTMLBinaryInputStream(source, **kwargs)


class HTMLUnicodeInputStream(object):
class HTMLUnicodeInputStream:
"""Provides a unicode stream of characters to the HTMLTokenizer.

This class takes care of character encoding and removing or replacing
Expand DownExpand Up@@ -673,7 +672,7 @@ def jumpTo(self, bytes):
return True


class EncodingParser(object):
class EncodingParser:
"""Mini parser for detecting character encoding from meta elements"""

def __init__(self, data):
Expand DownExpand Up@@ -861,7 +860,7 @@ def getAttribute(self):
attrValue.append(c)


class ContentAttrParser(object):
class ContentAttrParser:
def __init__(self, data):
assert isinstance(data, bytes)
self.data = data
Expand Down
3 changes: 1 addition & 2 deletionshtml5lib/_tokenizer.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from six import unichr as chr

Expand All@@ -24,7 +23,7 @@
attributeMap = OrderedDict


class HTMLTokenizer(object):
class HTMLTokenizer:
""" This class takes care of tokenizing HTML.

* self.currentToken
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/_trie/__init__.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

from .pyimportTrie

Expand Down
6 changes: 1 addition & 5 deletionshtml5lib/_trie/_base.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
from __future__ import absolute_import, division, unicode_literals

try:
from collections.abc import Mapping
except ImportError: # Python 2.7
from collections import Mapping
from collections.abc import Mapping


class Trie(Mapping):
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/_trie/py.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type

from bisect import bisect_left
Expand Down
10 changes: 3 additions & 7 deletionshtml5lib/_utils.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,15 @@
from __future__ import absolute_import, division, unicode_literals

from types import ModuleType

try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from collections.abc import Mapping

from six import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
else:
try:
import xml.etree.cElementTree as default_etree
import xml.etree.ElementTree as default_etree
except ImportError:
import xml.etree.ElementTree as default_etree

Expand DownExpand Up@@ -122,7 +118,7 @@ def moduleFactoryFactory(factory):
moduleCache = {}

def moduleFactory(baseModule, *args, **kwargs):
if isinstance(ModuleType.__name__,type("")):
if isinstance(ModuleType.__name__,str):
name = "_%s_factory" % baseModule.__name__
else:
name = b"_%s_factory" % baseModule.__name__
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/constants.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

importstring

Expand Down
1 change: 0 additions & 1 deletionhtml5lib/filters/alphabeticalattributes.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

from .importbase

Expand Down
3 changes: 1 addition & 2 deletionshtml5lib/filters/base.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
from __future__ import absolute_import, division, unicode_literals


class Filter(object):
class Filter:
def __init__(self, source):
self.source = source

Expand Down
1 change: 0 additions & 1 deletionhtml5lib/filters/inject_meta_charset.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

from .importbase

Expand Down
1 change: 0 additions & 1 deletionhtml5lib/filters/lint.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

fromsiximporttext_type

Expand Down
1 change: 0 additions & 1 deletionhtml5lib/filters/optionaltags.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

from .importbase

Expand Down
1 change: 0 additions & 1 deletionhtml5lib/filters/sanitizer.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -6,7 +6,6 @@
if Bleach is unsuitable for your needs.
"""
from __future__importabsolute_import,division,unicode_literals

importre
importwarnings
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/filters/whitespace.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

importre

Expand Down
9 changes: 4 additions & 5 deletionshtml5lib/html5parser.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals
from six import viewkeys

from . import _inputstream
Expand DownExpand Up@@ -69,7 +68,7 @@ def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElemen
return p.parseFragment(doc, container=container, **kwargs)


class HTMLParser(object):
class HTMLParser:
"""HTML parser

Generates a tree structure from a stream of (possibly malformed) HTML.
Expand DownExpand Up@@ -397,7 +396,7 @@ def parseRCDataRawtext(self, token, contentType):
self.phase = self.phases["text"]


class Phase(object):
class Phase:
"""Base class for helper object that implements each phase of processing
"""
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
Expand DownExpand Up@@ -428,7 +427,7 @@ def processSpaceCharacters(self, token):
def processStartTag(self, token):
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython2.7,3.8) GC cost when parsing many short inputs
# (CPython 3.8) GC cost when parsing many short inputs
name = token["name"]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
Expand All@@ -455,7 +454,7 @@ def startTagHtml(self, token):
def processEndTag(self, token):
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython2.7,3.8) GC cost when parsing many short inputs
# (CPython 3.8) GC cost when parsing many short inputs
name = token["name"]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
Expand Down
3 changes: 1 addition & 2 deletionshtml5lib/serializer.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type

import re
Expand DownExpand Up@@ -101,7 +100,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts):
return s.render(walker(input), encoding)


class HTMLSerializer(object):
class HTMLSerializer:

# attribute quoting options
quote_attr_values = "legacy" # be secure by default
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/tests/__init__.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
from __future__importabsolute_import,division,unicode_literals
5 changes: 2 additions & 3 deletionshtml5lib/tests/conftest.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__ import print_function
import os.path
import sys

Expand DownExpand Up@@ -54,7 +53,7 @@ def pytest_configure(config):
# Check for optional requirements
req_file = os.path.join(_root, "requirements-optional.txt")
if os.path.exists(req_file):
with open(req_file, "r") as fp:
with open(req_file) as fp:
for line in fp:
if (line.strip() and
not (line.startswith("-r") or
Expand All@@ -79,7 +78,7 @@ def pytest_configure(config):
import xml.etree.ElementTree as ElementTree

try:
import xml.etree.cElementTree as cElementTree
import xml.etree.ElementTree as cElementTree
except ImportError:
msgs.append("cElementTree unable to be imported")
else:
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/tests/sanitizer.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

importcodecs
importjson
Expand Down
3 changes: 1 addition & 2 deletionshtml5lib/tests/support.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

# pylint:disable=wrong-import-position

Expand DownExpand Up@@ -86,7 +85,7 @@ def __getitem__(self, key):
return dict.get(self, key, self.default)


class TestData(object):
class TestData:
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
if encoding is None:
self.f = open(filename, mode="rb")
Expand Down
1 change: 0 additions & 1 deletionhtml5lib/tests/test_alphabeticalattributes.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

fromcollectionsimportOrderedDict

Expand Down
5 changes: 2 additions & 3 deletionshtml5lib/tests/test_encoding.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
from __future__importabsolute_import,division,unicode_literals

importos

Expand All@@ -9,7 +8,7 @@


deftest_basic_prescan_length():
data="<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
data="<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
pad=1024-len(data)+1
data=data.replace(b"-a-",b"-"+ (b"a"*pad)+b"-")
assertlen(data)==1024# Sanity
Expand All@@ -18,7 +17,7 @@ def test_basic_prescan_length():


deftest_parser_reparse():
data="<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
data="<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
pad=10240-len(data)+1
data=data.replace(b"-a-",b"-"+ (b"a"*pad)+b"-")
assertlen(data)==10240# Sanity
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp