Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
        uops = get_opnames(ex)
        self.assertIn("_POP_TOP_NOP", uops)

    def test_float_add_inplace_unique_lhs(self):
        # a * b produces a unique float; adding c reuses it in place
        def testfunc(args):
            a, b, c, n = args
            total = 0.0
            for _ in range(n):
                total += a * b + c
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)

    def test_float_add_inplace_unique_rhs(self):
        # a * b produces a unique float on the right side of +
        def testfunc(args):
            a, b, c, n = args
            total = 0.0
            for _ in range(n):
                total += c + a * b
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)

    def test_float_add_no_inplace_non_unique(self):
        # Both operands of a + b are locals — neither is unique,
        # so the first add is regular. But total += (a+b) has a
        # unique RHS, so it uses _INPLACE_RIGHT.
        def testfunc(args):
            a, b, n = args
            total = 0.0
            for _ in range(n):
                total += a + b
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        # a + b: both are locals, no inplace
        self.assertIn("_BINARY_OP_ADD_FLOAT", uops)
        # total += result: result is unique RHS
        self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
        # No LHS inplace variant for the first add
        self.assertNotIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)

    def test_float_subtract_inplace_unique_lhs(self):
        # a * b produces a unique float; subtracting c reuses it
        def testfunc(args):
            a, b, c, n = args
            total = 0.0
            for _ in range(n):
                total += a * b - c
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE", uops)

    def test_float_subtract_inplace_unique_rhs(self):
        # a * b produces a unique float on the right of -;
        # result is c - (a * b), must get the sign correct
        def testfunc(args):
            a, b, c, n = args
            total = 0.0
            for _ in range(n):
                total += c - a * b
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * -5.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT", uops)

    def test_float_multiply_inplace_unique_lhs(self):
        # (a + b) produces a unique float; multiplying by c reuses it
        def testfunc(args):
            a, b, c, n = args
            total = 0.0
            for _ in range(n):
                total += (a + b) * c
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops)

    def test_float_multiply_inplace_unique_rhs(self):
        # (a + b) produces a unique float on the right side of *
        def testfunc(args):
            a, b, c, n = args
            total = 0.0
            for _ in range(n):
                total += c * (a + b)
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT", uops)

    def test_float_inplace_chain_propagation(self):
        # a * b + c * d: both products are unique, the + reuses one;
        # result of + is also unique for the subsequent +=
        def testfunc(args):
            a, b, c, d, n = args
            total = 0.0
            for _ in range(n):
                total += a * b + c * d
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * 26.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        # The + between the two products should use an inplace variant
        inplace_add = (
            "_BINARY_OP_ADD_FLOAT_INPLACE" in uops
            or "_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT" in uops
        )
        self.assertTrue(inplace_add,
            "Expected an inplace add for unique intermediate results")

    def test_float_negate_inplace_unique(self):
        # -(a * b): the product is unique, negate it in place
        def testfunc(args):
            a, b, n = args
            total = 0.0
            for _ in range(n):
                total += -(a * b)
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * -6.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

    def test_float_negate_no_inplace_non_unique(self):
        # -a where a is a local — not unique, no inplace
        def testfunc(args):
            a, n = args
            total = 0.0
            for _ in range(n):
                total += -a
            return total

        res, ex = self._run_with_optimizer(testfunc, (2.0, TIER2_THRESHOLD))
        self.assertAlmostEqual(res, TIER2_THRESHOLD * -2.0)
        self.assertIsNotNone(ex)
        uops = get_opnames(ex)
        self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

    def test_load_attr_instance_value(self):
        def testfunc(n):
            class C():
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-22-12-00-00.gh-issue-146306.870ef4.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-22-12-00-00.gh-issue-146306.870ef4.rst
 Optimize float arithmetic in the JIT by mutating uniquely-referenced
 operands in place, avoiding allocation of a new float object. Speeds up
 the pyperformance ``nbody`` benchmark by ~19%.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3072,6 +3072,171 @@ def testfunc(args):
		uops = get_opnames(ex)
		self.assertIn("_POP_TOP_NOP", uops)

		def test_float_add_inplace_unique_lhs(self):
		# a * b produces a unique float; adding c reuses it in place
		def testfunc(args):
		a, b, c, n = args
		total = 0.0
		for _ in range(n):
		total += a * b + c
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)

		def test_float_add_inplace_unique_rhs(self):
		# a * b produces a unique float on the right side of +
		def testfunc(args):
		a, b, c, n = args
		total = 0.0
		for _ in range(n):
		total += c + a * b
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)

		def test_float_add_no_inplace_non_unique(self):
		# Both operands of a + b are locals — neither is unique,
		# so the first add is regular. But total += (a+b) has a
		# unique RHS, so it uses _INPLACE_RIGHT.
		def testfunc(args):
		a, b, n = args
		total = 0.0
		for _ in range(n):
		total += a + b
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		# a + b: both are locals, no inplace
		self.assertIn("_BINARY_OP_ADD_FLOAT", uops)
		# total += result: result is unique RHS
		self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
		# No LHS inplace variant for the first add
		self.assertNotIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)

		def test_float_subtract_inplace_unique_lhs(self):
		# a * b produces a unique float; subtracting c reuses it
		def testfunc(args):
		a, b, c, n = args
		total = 0.0
		for _ in range(n):
		total += a * b - c
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE", uops)

		def test_float_subtract_inplace_unique_rhs(self):
		# a * b produces a unique float on the right of -;
		# result is c - (a * b), must get the sign correct
		def testfunc(args):
		a, b, c, n = args
		total = 0.0
		for _ in range(n):
		total += c - a * b
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * -5.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT", uops)

		def test_float_multiply_inplace_unique_lhs(self):
		# (a + b) produces a unique float; multiplying by c reuses it
		def testfunc(args):
		a, b, c, n = args
		total = 0.0
		for _ in range(n):
		total += (a + b) * c
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops)

		def test_float_multiply_inplace_unique_rhs(self):
		# (a + b) produces a unique float on the right side of *
		def testfunc(args):
		a, b, c, n = args
		total = 0.0
		for _ in range(n):
		total += c * (a + b)
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT", uops)

		def test_float_inplace_chain_propagation(self):
		# a * b + c * d: both products are unique, the + reuses one;
		# result of + is also unique for the subsequent +=
		def testfunc(args):
		a, b, c, d, n = args
		total = 0.0
		for _ in range(n):
		total += a * b + c * d
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * 26.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		# The + between the two products should use an inplace variant
		inplace_add = (
		"_BINARY_OP_ADD_FLOAT_INPLACE" in uops
		or "_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT" in uops
		)
		self.assertTrue(inplace_add,
		"Expected an inplace add for unique intermediate results")

		def test_float_negate_inplace_unique(self):
		# -(a * b): the product is unique, negate it in place
		def testfunc(args):
		a, b, n = args
		total = 0.0
		for _ in range(n):
		total += -(a * b)
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * -6.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

		def test_float_negate_no_inplace_non_unique(self):
		# -a where a is a local — not unique, no inplace
		def testfunc(args):
		a, n = args
		total = 0.0
		for _ in range(n):
		total += -a
		return total

		res, ex = self._run_with_optimizer(testfunc, (2.0, TIER2_THRESHOLD))
		self.assertAlmostEqual(res, TIER2_THRESHOLD * -2.0)
		self.assertIsNotNone(ex)
		uops = get_opnames(ex)
		self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

		def test_load_attr_instance_value(self):
		def testfunc(n):
		class C():
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Optimize float arithmetic in the JIT by mutating uniquely-referenced
		operands in place, avoiding allocation of a new float object. Speeds up
		the pyperformance ``nbody`` benchmark by ~19%.