Jul 15, 2016 · Jul 15, 2016 · Jul 15, 2016 · Jul 15, 2016
diff --git a/CHANGES.rst b/CHANGES.rst
 0.999999999/1.0b10
 ~~~~~~~~~~~~~~~~~~

 Released onXXX
 Released onJuly 15, 2016

 * XXX
 * Fix attribute order going to the tree builder to be document order
  instead of reverse document order(!).


 0.99999999/1.0b9
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
           "getTreeWalker", "serialize"]

 # this has to be at the top level, see how setup.py parses this
 __version__ = "0.999999999-dev"
 __version__ = "0.9999999999-dev"
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
        """ HTML5 specific normalizations to the token stream """

        if token["type"] == tokenTypes["StartTag"]:
            token["data"] = OrderedDict(token['data'][::-1])
            raw = token["data"]
            token["data"] = OrderedDict(raw)
            if len(raw) > len(token["data"]):
                # we had some duplicated attribute, fix so first wins
                token["data"].update(raw[::-1])

        return token

diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
 from __future__ import absolute_import, division, unicode_literals

 from six import PY2, text_type
 from six import PY2, text_type, unichr

 import io

 from . import support  # noqa

 from html5lib.constants import namespaces
 from html5lib.constants import namespaces, tokenTypes
 from html5lib import parse, parseFragment, HTMLParser


    assert parse(io.StringIO("a")) is not None


 def test_maintain_attribute_order():
    # This is here because we impl it in parser and not tokenizer
    p = HTMLParser()
    # generate loads to maximize the chance a hash-based mutation will occur
    attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
    token = {'name': 'html',
             'selfClosing': False,
             'selfClosingAcknowledged': False,
             'type': tokenTypes["StartTag"],
             'data': attrs}
    out = p.normalizeToken(token)
    attr_order = list(out["data"].keys())
    assert attr_order == [x for x, i in attrs]


 def test_duplicate_attribute():
    # This is here because we impl it in parser and not tokenizer
    doc = parse('<p class=a class=b>')
    el = doc[1][0]
    assert el.get("class") == "a"


 def test_maintain_duplicate_attribute_order():
    # This is here because we impl it in parser and not tokenizer
    p = HTMLParser()
    attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
    token = {'name': 'html',
             'selfClosing': False,
             'selfClosingAcknowledged': False,
             'type': tokenTypes["StartTag"],
             'data': attrs + [('a', len(attrs))]}
    out = p.normalizeToken(token)
    attr_order = list(out["data"].keys())
    assert attr_order == [x for x, i in attrs]


 def test_debug_log():
    parser = HTMLParser(debug=True)
    parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,9 +4,10 @@ Change Log
		0.999999999/1.0b10
		~~~~~~~~~~~~~~~~~~

		Released onXXX
		Released onJuly 15, 2016

		* XXX
		* Fix attribute order going to the tree builder to be document order
		instead of reverse document order(!).


		0.99999999/1.0b9
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -22,4 +22,4 @@
		"getTreeWalker", "serialize"]

		# this has to be at the top level, see how setup.py parses this
		__version__ = "0.999999999-dev"
		__version__ = "0.9999999999-dev"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -265,7 +265,11 @@ def normalizeToken(self, token):
		""" HTML5 specific normalizations to the token stream """

		if token["type"] == tokenTypes["StartTag"]:
		token["data"] = OrderedDict(token['data'][::-1])
		raw = token["data"]
		token["data"] = OrderedDict(raw)
		if len(raw) > len(token["data"]):
		# we had some duplicated attribute, fix so first wins
		token["data"].update(raw[::-1])

		return token

Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,12 +1,12 @@
		from __future__ import absolute_import, division, unicode_literals

		from six import PY2, text_type
		from six import PY2, text_type, unichr

		import io

		from . import support # noqa

		from html5lib.constants import namespaces
		from html5lib.constants import namespaces, tokenTypes
		from html5lib import parse, parseFragment, HTMLParser


Expand DownExpand Up		@@ -53,13 +53,42 @@ def test_unicode_file():
		assert parse(io.StringIO("a")) is not None


		def test_maintain_attribute_order():
		# This is here because we impl it in parser and not tokenizer
		p = HTMLParser()
		# generate loads to maximize the chance a hash-based mutation will occur
		attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
		token = {'name': 'html',
		'selfClosing': False,
		'selfClosingAcknowledged': False,
		'type': tokenTypes["StartTag"],
		'data': attrs}
		out = p.normalizeToken(token)
		attr_order = list(out["data"].keys())
		assert attr_order == [x for x, i in attrs]


		def test_duplicate_attribute():
		# This is here because we impl it in parser and not tokenizer
		doc = parse('<p class=a class=b>')
		el = doc[1][0]
		assert el.get("class") == "a"


		def test_maintain_duplicate_attribute_order():
		# This is here because we impl it in parser and not tokenizer
		p = HTMLParser()
		attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
		token = {'name': 'html',
		'selfClosing': False,
		'selfClosingAcknowledged': False,
		'type': tokenTypes["StartTag"],
		'data': attrs + [('a', len(attrs))]}
		out = p.normalizeToken(token)
		attr_order = list(out["data"].keys())
		assert attr_order == [x for x, i in attrs]


		def test_debug_log():
		parser = HTMLParser(debug=True)
		parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
Expand Down