Oct 4, 2021 · Sep 28, 2021 · Sep 28, 2021 · Sep 28, 2021 · Sep 28, 2021 · Sep 29, 2021
diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py
 ESP32 ULP Co-Processor Assembler
 """

 import re
 from . import opcodes
 from .nocomment import remove_comments as do_remove_comments
 from .util import garbage_collect
        self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {})
        opcodes.symbols = self.symbols  # XXX dirty hack

        # regex for parsing assembly lines
        # format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]]
        # where [] means optional
        # initialised here once, instead of compiling once per line
        self.line_regex = re.compile(r'^(\s*([a-zA-Z0-9_$.]+):)?\s*((\S*)\s*(.*))$')

    def init(self, a_pass):
        self.a_pass = a_pass
        self.sections = dict(text=[], data=[])
        """
        if not line:
            return
        has_label = line[0] not in '\t .'
        if has_label:
            label_line = line.split(None, 1)
            if len(label_line) == 2:
                label, line = label_line
            else:  # 1
                label, line = label_line[0], None
            label = label.rstrip(':')
        else:
            label, line = None, line.lstrip()
        if line is None:
            opcode, args = None, ()
        else:
            opcode_args = line.split(None, 1)
            if len(opcode_args) == 2:
                opcode, args = opcode_args
                args = tuple(arg.strip() for arg in args.split(','))
            else:  # 1
                opcode, args = opcode_args[0], ()

        matches = self.line_regex.match(line)
        label, opcode, args = matches.group(2), matches.group(4), matches.group(5)

        label = label if label else None  # force empty strings to None
        opcode = opcode if opcode else None  # force empty strings to None
        args = tuple(arg.strip() for arg in args.split(',')) if args else ()

        return label, opcode, args

    def split_statements(self, lines):
diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py
    if arg.type == IMM:
        if arg.value & 3 != 0:  # bitwise version of: arg.value % 4 != 0
            raise ValueError('Relative offset must be a multiple of 4')
        return arg.value >> 2  # bitwise version of: arg.value // 4
        returnIMM,arg.value >> 2  # bitwise version of: arg.value // 4
    if arg.type == SYM:
        return symbols.resolve_relative(arg.value)
        returnSYM,symbols.resolve_relative(arg.value)
    raise TypeError('wanted: immediate, got: %s' % arg.raw)


    return _tsens.all


 def i_adc(reg_dest, adc_idx, mux):
 def i_adc(reg_dest, adc_idx, mux, _not_used=None):
    _adc.dreg = get_reg(reg_dest)
    _adc.mux = get_imm(mux)
    _adc.sar_sel = get_imm(adc_idx)
        raise ValueError("invalid flags condition")
    if target.type == IMM or target.type == SYM:
        _bx.dreg = 0
        _bx.addr = get_abs(target)
        # we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.
        _bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2  # bitwise version of "// 4"
        _bx.unused = 0
        _bx.reg = 0
        _bx.type = jump_type


 def i_jumpr(offset, threshold, condition):
    offset = get_rel(offset)
 offset_type,offset = get_rel(offset)
    threshold = get_imm(threshold)
    condition = get_cond(condition)
    if condition == 'lt':
        # jump over next JUMPR
        skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2)
        # jump to target
        offset -= 1  # adjust for the additional JUMPR instruction
        if (offset_type == IMM and offset < 0) or offset_type == SYM:
            # adjust for the additional JUMPR instruction
            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
            offset -= 1
        jump_ins = _jump_relr(threshold, BRCOND_GE, offset)
        return (skip_ins, jump_ins)
    else:


 def i_jumps(offset, threshold, condition):
    offset = get_rel(offset)
 offset_type,offset = get_rel(offset)
    threshold = get_imm(threshold)
    condition = get_cond(condition)
    if condition == 'lt':
        # jump over next JUMPS
        skip_ins = _jump_rels(threshold, skip_cond, 2)
        # jump to target
        offset -= 1  # adjust for the additional JUMPS instruction
        if (offset_type == IMM and offset < 0) or offset_type == SYM:
            # adjust for the additional JUMPS instruction
            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
            offset -= 1
        jump_ins = _jump_rels(threshold, jump_cond, offset)

        return (skip_ins, jump_ins)
diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh
    test_name="${src_name##*/}"

    # for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix)
    for I inesp32ulp_all esp32ulp_globalsesp32ulp_jumpr esp32ulp_ranges test_reg; do
    for I in esp32ulp_jumpr esp32ulp_ranges; do
        if [ "${test_name}" = "$I" ]; then
            # these are old bugs, and not related to the RTC macro handling functionality
            # they will still be great to fix over time
diff --git a/tests/assemble.py b/tests/assemble.py
    assert a.parse_line(next(lines)) == (None, '.data', ())  # test left-aligned directive is not treated as label


 def test_parse_labels_correctly():
    """
    description of what defines a label
    https://sourceware.org/binutils/docs/as/Statements.html
    https://sourceware.org/binutils/docs/as/Labels.html
    """
    a = Assembler()
    assert a.parse_line('') is None
    assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
    assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
    assert a.parse_line('label:') == ('label', None, ())
    assert a.parse_line('    label:') == ('label', None, ())
    assert a.parse_line('    label:  ') == ('label', None, ())
    assert a.parse_line('nop  ') == (None, 'nop', ())
    assert a.parse_line('.set c, 1  ') == (None, '.set', ('c', '1',))
    assert a.parse_line('invalid : nop') == (None, 'invalid', (': nop',))  # no whitespace between label and colon
    assert a.parse_line('.string "hello world"') == (None, '.string', ('"hello world"',))
    assert a.parse_line('.string "hello : world"') == (None, '.string', ('"hello : world"',))  # colon in string
    assert a.parse_line('label::') == ('label', ':', ())
    assert a.parse_line('label: :') == ('label', ':', ())
    assert a.parse_line('a_label:') == ('a_label', None, ())
    assert a.parse_line('$label:') == ('$label', None, ())
    assert a.parse_line('.label:') == ('.label', None, ())
    assert a.parse_line('&label:') == (None, '&label:', ())  # & not a valid char in a label


 def test_parse():
    a = Assembler()
    lines = remove_comments(src)


 test_parse_line()
 test_parse_labels_correctly()
 test_parse()
 test_assemble()
 test_assemble_bss()
diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S
  # interpret ; as statement separator - this results in 2 NOP machine instructions
  nop; nop;

  # adc supports an undocumented 4th argument, which should be entirely ignored
  # binutils-esp32ulp also ignores this argument, if present, see:
  # https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/config/esp32ulp-parse.y#L810
  adc r1, 0, 1, 100

  halt
diff --git a/tests/compat/jumps.S b/tests/compat/jumps.S
 entry:
  nop

  # simple jumps
  jump entry
  jump later
  jump 0x120, EQ
  jump -288, EQ

  # jumps with labels
  jumps entry, 42, lt
  jumps entry, 42, lt

  # jumps with immediate offset (specified in bytes, but real instruction uses words)
  jumps 0, 42, lt
  jumps 0, 42, eq  # dual-instruction condition

  jumps 4, 42, lt
  jumps 4, 42, eq  # dual-instruction condition
  jumps 8, 42, lt
  jumps 32, 42, lt

  jumps -4, 42, lt
  jumps -4, 42, eq  # dual-instruction condition
  jumps -8, 42, lt
  jumps -32, 42, lt


  # jumpr with immediate offset (specified in bytes, but real instruction uses words)
  jumpr 0, 42, lt
  jumpr 0, 42, eq  # dual-instruction condition

  jumpr 4, 42, lt
  jumpr 4, 42, eq  # dual-instruction condition
  jumpr 8, 42, lt
  jumpr 32, 42, lt

  jumpr -4, 42, lt
  jumpr -4, 42, eq  # dual-instruction condition
  jumpr -8, 42, lt
  jumpr -32, 42, lt
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,6 +2,7 @@
		ESP32 ULP Co-Processor Assembler
		"""

		import re
		from . import opcodes
		from .nocomment import remove_comments as do_remove_comments
		from .util import garbage_collect
Expand DownExpand Up		@@ -91,6 +92,12 @@ def __init__(self, symbols=None, bases=None, globals=None):
		self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {})
		opcodes.symbols = self.symbols # XXX dirty hack

		# regex for parsing assembly lines
		# format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]]
		# where [] means optional
		# initialised here once, instead of compiling once per line
		self.line_regex = re.compile(r'^(\s([a-zA-Z0-9_$.]+):)?\s((\S)\s(.*))$')

		def init(self, a_pass):
		self.a_pass = a_pass
		self.sections = dict(text=[], data=[])
Expand All		@@ -108,25 +115,14 @@ def parse_line(self, line):
		"""
		if not line:
		return
		has_label = line[0] not in '\t .'
		if has_label:
		label_line = line.split(None, 1)
		if len(label_line) == 2:
		label, line = label_line
		else: # 1
		label, line = label_line[0], None
		label = label.rstrip(':')
		else:
		label, line = None, line.lstrip()
		if line is None:
		opcode, args = None, ()
		else:
		opcode_args = line.split(None, 1)
		if len(opcode_args) == 2:
		opcode, args = opcode_args
		args = tuple(arg.strip() for arg in args.split(','))
		else: # 1
		opcode, args = opcode_args[0], ()

		matches = self.line_regex.match(line)
		label, opcode, args = matches.group(2), matches.group(4), matches.group(5)

		label = label if label else None # force empty strings to None
		opcode = opcode if opcode else None # force empty strings to None
		args = tuple(arg.strip() for arg in args.split(',')) if args else ()

		return label, opcode, args

		def split_statements(self, lines):
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -342,9 +342,9 @@ def get_rel(arg):
		if arg.type == IMM:
		if arg.value & 3 != 0: # bitwise version of: arg.value % 4 != 0
		raise ValueError('Relative offset must be a multiple of 4')
		return arg.value >> 2 # bitwise version of: arg.value // 4
		returnIMM,arg.value >> 2 # bitwise version of: arg.value // 4
		if arg.type == SYM:
		return symbols.resolve_relative(arg.value)
		returnSYM,symbols.resolve_relative(arg.value)
		raise TypeError('wanted: immediate, got: %s' % arg.raw)


Expand DownExpand Up		@@ -449,7 +449,7 @@ def i_tsens(reg_dest, delay):
		return _tsens.all


		def i_adc(reg_dest, adc_idx, mux):
		def i_adc(reg_dest, adc_idx, mux, _not_used=None):
		_adc.dreg = get_reg(reg_dest)
		_adc.mux = get_imm(mux)
		_adc.sar_sel = get_imm(adc_idx)
Expand DownExpand Up		@@ -619,7 +619,8 @@ def i_jump(target, condition='--'):
		raise ValueError("invalid flags condition")
		if target.type == IMM or target.type == SYM:
		_bx.dreg = 0
		_bx.addr = get_abs(target)
		# we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.
		_bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2 # bitwise version of "// 4"
Copy link Collaborator ThomasWaldmannSep 29, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I don't get this. If it is a symbol, it directly uses its value (absolute address) else it uses an immediate offset // 4? Considering everything else in the opcode is the same, how does the cpu know what to do? like whether it is an offset or an absolute address? Copy link CollaboratorAuthor wnienhausOct 3, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. In both cases what the machine instruction gets is an absolute address (not relative offset). The absolute address is either the address of a label, or an absolute address specified as immediate value. What is different in the two cases is that the assembler instruction must take an address given in bytes, while the machine instruction needs to contain the address in words. Because in py-esp32-ulp we already track label addresses in words internally, we do not need to divide those symbol addresses by 4. But for immediate values we need to (see:https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-guides/ulp_instruction_set.html#note-about-addressing). Does that make sense, or did I miss what you asked? However... I did just discover one more case for the JUMP instruction, where we don't properly divide by 4. So this current logic is not 100% perfect. It appears to be that it's not only IMM values that must be divided by 4, but also symbol values that we internally label with the ABS type (as opposed to REL). Those ABS type symbols are defined by`.set`. So maybe the logic here should really be: Is what was given to us a relocatable thing (label) or not. If yes, use as is, if not divide by 4. This logic is starting to look valid to me, the more I think about it. It should give the same result for the cases currently handled correctly, but also for the case of symbols defined with`.set`. It would requires a bit of a bigger change though, because of expression evaluation, which potentially uses multiple symbols that could have different types. So I will try and see how binutils-esp32ulp handles mixing symbols types, and likely a valid solution could be to consider the result of an expression to be a REL type if any symbols in the expression were of that type, otherwise consider the result to be an ABS type. And then use ABS and REL here rather than SYM and IMM to decide whether to divide by 4. Copy link Collaborator ThomasWaldmannOct 3, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. thanks for explaining, please add a comment above this like: `# we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.` Copy link Collaborator ThomasWaldmannOct 3, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Maybe we could also have ABS1 (byte addresses), ABS4 (32bit word addrs), etc. so we better know what we have. Copy link CollaboratorAuthor wnienhausOct 3, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. So, my last part about the REL vs ABS, I think is not relevant anymore. I believe that "one more case" I talked about above, is actually a bug in binutils-esp32ulp I have now tested quite a bit more and stepped through binutils-esp32ulp a bit and found the following: whether the value (that was defined with`.set`) gets divided by 4 depends on whether the symbol is marked as "global" or not. If marked as global, then the value will be taken as-is, when not marked as global, the value will be divided by 4. I don't see how this could be intended behaviour. The reason I started to get skeptical is that binutils-esp32ulp behaves correctly for JUMPS and JUMPR cases, as per description in the documentation:https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-guides/ulp_instruction_set.html#note-about-addressing. The documentation shows an example, where values, which come from`.set`'ed symbols, should not be converted from bytes to words. So the fact that the JUMP instruction then tried to divide the value of the symbol by 4 (in the case I tried to fix) seems inconsistent. I think the issues comes up in binutils-esp32ulp, depending on where the value is actually "filled-in" into the instruction. For non-global symbols, the value is already set during the assembly stage (`as`), while in the global case, the symbol value is only "filled in" during the linking stage (`ld`) - it's 0 in the instruction before that stage. The implementation of this logic in the assembler and linker differ. So that means, I would not change anything in our implementation (other than adding the comment you suggested). I might file a bug report to the binutils-esp32ulp project. Copy link Collaborator ThomasWaldmannOct 4, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Yeah, sounds strange, please file a bug. Copy link CollaboratorAuthor wnienhausOct 10, 2021 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I filed a bug report now:espressif/binutils-esp32ulp#22 ThomasWaldmann reacted with thumbs up emoji
		_bx.unused = 0
		_bx.reg = 0
		_bx.type = jump_type
Expand DownExpand Up		@@ -652,7 +653,7 @@ def _jump_relr(threshold, cond, offset):


		def i_jumpr(offset, threshold, condition):
		offset = get_rel(offset)
		offset_type,offset = get_rel(offset)
		threshold = get_imm(threshold)
		condition = get_cond(condition)
		if condition == 'lt':
Expand All		@@ -669,7 +670,11 @@ def i_jumpr(offset, threshold, condition):
		# jump over next JUMPR
		skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2)
		# jump to target
		offset -= 1 # adjust for the additional JUMPR instruction
		if (offset_type == IMM and offset < 0) or offset_type == SYM:
		# adjust for the additional JUMPR instruction
		# for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
		# for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
		offset -= 1
		jump_ins = _jump_relr(threshold, BRCOND_GE, offset)
		return (skip_ins, jump_ins)
		else:
Expand All		@@ -691,7 +696,7 @@ def _jump_rels(threshold, cond, offset):


		def i_jumps(offset, threshold, condition):
		offset = get_rel(offset)
		offset_type,offset = get_rel(offset)
		threshold = get_imm(threshold)
		condition = get_cond(condition)
		if condition == 'lt':
Expand All		@@ -711,7 +716,11 @@ def i_jumps(offset, threshold, condition):
		# jump over next JUMPS
		skip_ins = _jump_rels(threshold, skip_cond, 2)
		# jump to target
		offset -= 1 # adjust for the additional JUMPS instruction
		if (offset_type == IMM and offset < 0) or offset_type == SYM:
		# adjust for the additional JUMPS instruction
		# for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
		# for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
		offset -= 1
		jump_ins = _jump_rels(threshold, jump_cond, offset)

		return (skip_ins, jump_ins)
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -66,7 +66,7 @@ for src_file in ulptool/src/ulp_examples//.s binutils-esp32ulp/gas/testsuite/g
		test_name="${src_name##*/}"

		# for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix)
		for I inesp32ulp_all esp32ulp_globalsesp32ulp_jumpr esp32ulp_ranges test_reg; do
		for I in esp32ulp_jumpr esp32ulp_ranges; do
		if [ "${test_name}" = "$I" ]; then
		# these are old bugs, and not related to the RTC macro handling functionality
		# they will still be great to fix over time
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -53,6 +53,32 @@ def test_parse_line():
		assert a.parse_line(next(lines)) == (None, '.data', ()) # test left-aligned directive is not treated as label


		def test_parse_labels_correctly():
		"""
		description of what defines a label
		https://sourceware.org/binutils/docs/as/Statements.html
		https://sourceware.org/binutils/docs/as/Labels.html
		"""
		a = Assembler()
		assert a.parse_line('') is None
		assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
		assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
		assert a.parse_line('label:') == ('label', None, ())
		assert a.parse_line(' label:') == ('label', None, ())
		assert a.parse_line(' label: ') == ('label', None, ())
ThomasWaldmann marked this conversation as resolved. Show resolvedHide resolved
		assert a.parse_line('nop ') == (None, 'nop', ())
		assert a.parse_line('.set c, 1 ') == (None, '.set', ('c', '1',))
		assert a.parse_line('invalid : nop') == (None, 'invalid', (': nop',)) # no whitespace between label and colon
		assert a.parse_line('.string "hello world"') == (None, '.string', ('"hello world"',))
		assert a.parse_line('.string "hello : world"') == (None, '.string', ('"hello : world"',)) # colon in string
		assert a.parse_line('label::') == ('label', ':', ())
		assert a.parse_line('label: :') == ('label', ':', ())
		assert a.parse_line('a_label:') == ('a_label', None, ())
		assert a.parse_line('$label:') == ('$label', None, ())
		assert a.parse_line('.label:') == ('.label', None, ())
		assert a.parse_line('&label:') == (None, '&label:', ()) # & not a valid char in a label


		def test_parse():
		a = Assembler()
		lines = remove_comments(src)
Expand DownExpand Up		@@ -260,6 +286,7 @@ def test_support_multiple_statements_per_line():


		test_parse_line()
		test_parse_labels_correctly()
		test_parse()
		test_assemble()
		test_assemble_bss()
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -28,4 +28,9 @@ entry:
		# interpret ; as statement separator - this results in 2 NOP machine instructions
		nop; nop;

		# adc supports an undocumented 4th argument, which should be entirely ignored
		# binutils-esp32ulp also ignores this argument, if present, see:
		# https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/config/esp32ulp-parse.y#L810
		adc r1, 0, 1, 100

		halt
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,6 +5,12 @@
		entry:
		nop

		# simple jumps
		jump entry
		jump later
		jump 0x120, EQ
		jump -288, EQ

		# jumps with labels
		jumps entry, 42, lt
		jumps entry, 42, lt
Expand All		@@ -20,12 +26,15 @@ entry:

		# jumps with immediate offset (specified in bytes, but real instruction uses words)
		jumps 0, 42, lt
		jumps 0, 42, eq # dual-instruction condition

		jumps 4, 42, lt
		jumps 4, 42, eq # dual-instruction condition
		jumps 8, 42, lt
		jumps 32, 42, lt

		jumps -4, 42, lt
		jumps -4, 42, eq # dual-instruction condition
		jumps -8, 42, lt
		jumps -32, 42, lt

Expand All		@@ -46,12 +55,15 @@ entry:

		# jumpr with immediate offset (specified in bytes, but real instruction uses words)
		jumpr 0, 42, lt
		jumpr 0, 42, eq # dual-instruction condition

		jumpr 4, 42, lt
		jumpr 4, 42, eq # dual-instruction condition
		jumpr 8, 42, lt
		jumpr 32, 42, lt

		jumpr -4, 42, lt
		jumpr -4, 42, eq # dual-instruction condition
		jumpr -8, 42, lt
		jumpr -32, 42, lt

Expand Down