Feb 1, 2024 · Jan 31, 2024
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
    GCState *gcstate;
    Py_ssize_t collected;
    Py_ssize_t uncollectable;
    Py_ssize_t long_lived_total;
    struct worklist unreachable;
    struct worklist legacy_finalizers;
    struct worklist wrcb_to_call;
    else {
        // object is reachable, restore `ob_tid`; we're done with these objects
        gc_restore_tid(op);
        state->gcstate->long_lived_total++;
        state->long_lived_total++;
    }

    return true;
 void
 _PyGC_InitState(GCState *gcstate)
 {
    // TODO: move to pycore_runtime_init.h once the incremental GC lands.
    gcstate->generations[0].threshold = 2000;
 }


    assert(!_PyErr_Occurred(tstate));
 }


 /* Find the oldest generation (highest numbered) where the count
 * exceeds the threshold.  Objects in the that generation and
 * generations younger than it will be collected. */
 static int
 gc_select_generation(GCState *gcstate)
 {
    for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
        if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
            /* Avoid quadratic performance degradation in number
               of tracked objects (see also issue #4074):

               To limit the cost of garbage collection, there are two strategies;
                 - make each collection faster, e.g. by scanning fewer objects
                 - do less collections
               This heuristic is about the latter strategy.

               In addition to the various configurable thresholds, we only trigger a
               full collection if the ratio

                long_lived_pending / long_lived_total

               is above a given value (hardwired to 25%).

               The reason is that, while "non-full" collections (i.e., collections of
               the young and middle generations) will always examine roughly the same
               number of objects -- determined by the aforementioned thresholds --,
               the cost of a full collection is proportional to the total number of
               long-lived objects, which is virtually unbounded.

               Indeed, it has been remarked that doing a full collection every
               <constant number> of object creations entails a dramatic performance
               degradation in workloads which consist in creating and storing lots of
               long-lived objects (e.g. building a large list of GC-tracked objects would
               show quadratic performance, instead of linear as expected: see issue #4074).

               Using the above ratio, instead, yields amortized linear performance in
               the total number of objects (the effect of which can be summarized
               thusly: "each full garbage collection is more and more costly as the
               number of objects grows, but we do fewer and fewer of them").

               This heuristic was suggested by Martin von Löwis on python-dev in
               June 2008. His original analysis and proposal can be found at:
               http://mail.python.org/pipermail/python-dev/2008-June/080579.html
            */
            if (i == NUM_GENERATIONS - 1
                && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
            {
                continue;
            }
            return i;
        }
    }
    return -1;
 }

 static void
 cleanup_worklist(struct worklist *worklist)
 {
    }
 }

 static bool
 gc_should_collect(GCState *gcstate)
 {
    int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count);
    int threshold = gcstate->generations[0].threshold;
    if (count <= threshold || threshold == 0 || !gcstate->enabled) {
        return false;
    }
    // Avoid quadratic behavior by scaling threshold to the number of live
    // objects. A few tests rely on immediate scheduling of the GC so we ignore
    // the scaled threshold if generations[1].threshold is set to zero.
    return (count > gcstate->long_lived_total / 4 ||
            gcstate->generations[1].threshold == 0);
 }

 static void
 gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
 {
        return 0;
    }

    if (generation == GENERATION_AUTO) {
        // Select the oldest generation that needs collecting. We will collect
        // objects from that generation and all generations younger than it.
        generation = gc_select_generation(gcstate);
        if (generation < 0) {
            // No generation needs to be collected.
            _Py_atomic_store_int(&gcstate->collecting, 0);
            return 0;
        }
    if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(gcstate)) {
        // Don't collect if the threshold is not exceeded.
        _Py_atomic_store_int(&gcstate->collecting, 0);
        return 0;
    }

    assert(generation >= 0 && generation < NUM_GENERATIONS);

    m = state.collected;
    n = state.uncollectable;
    gcstate->long_lived_total = state.long_lived_total;

    if (gcstate->debug & _PyGC_DEBUG_STATS) {
        double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
 {
    PyThreadState *tstate = _PyThreadState_GET();
    GCState *gcstate = &tstate->interp->gc;
    gcstate->generations[0].count++; /* number of allocated GC objects */
    if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
        gcstate->enabled &&
        gcstate->generations[0].threshold &&
        !_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
        !_PyErr_Occurred(tstate))
    gcstate->generations[0].count++;

    if (gc_should_collect(gcstate) &&
        !_Py_atomic_load_int_relaxed(&gcstate->collecting))
    {
        _Py_ScheduleGC(tstate->interp);
    }
 void
 _Py_RunGC(PyThreadState *tstate)
 {
    gc_collect_main(tstate,GENERATION_AUTO, _Py_GC_REASON_HEAP);
    gc_collect_main(tstate,0, _Py_GC_REASON_HEAP);
 }

 static PyObject *
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,6 +46,7 @@ struct collection_state {
		GCState *gcstate;
		Py_ssize_t collected;
		Py_ssize_t uncollectable;
		Py_ssize_t long_lived_total;
		struct worklist unreachable;
		struct worklist legacy_finalizers;
		struct worklist wrcb_to_call;
Expand DownExpand Up		@@ -443,7 +444,7 @@ scan_heap_visitor(const mi_heap_t heap, const mi_heap_area_t area,
		else {
		// object is reachable, restore `ob_tid`; we're done with these objects
		gc_restore_tid(op);
		state->gcstate->long_lived_total++;
		state->long_lived_total++;
		}

		return true;
Expand DownExpand Up		@@ -605,6 +606,8 @@ get_gc_state(void)
		void
		_PyGC_InitState(GCState *gcstate)
		{
		// TODO: move to pycore_runtime_init.h once the incremental GC lands.
		gcstate->generations[0].threshold = 2000;
		}


Expand DownExpand Up		@@ -885,62 +888,6 @@ invoke_gc_callback(PyThreadState tstate, const char phase,
		assert(!_PyErr_Occurred(tstate));
		}


		/* Find the oldest generation (highest numbered) where the count
		* exceeds the threshold. Objects in the that generation and
		* generations younger than it will be collected. */
		static int
		gc_select_generation(GCState *gcstate)
		{
		for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
		if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
		/* Avoid quadratic performance degradation in number
		of tracked objects (see also issue #4074):

		To limit the cost of garbage collection, there are two strategies;
		- make each collection faster, e.g. by scanning fewer objects
		- do less collections
		This heuristic is about the latter strategy.

		In addition to the various configurable thresholds, we only trigger a
		full collection if the ratio

		long_lived_pending / long_lived_total

		is above a given value (hardwired to 25%).

		The reason is that, while "non-full" collections (i.e., collections of
		the young and middle generations) will always examine roughly the same
		number of objects -- determined by the aforementioned thresholds --,
		the cost of a full collection is proportional to the total number of
		long-lived objects, which is virtually unbounded.

		Indeed, it has been remarked that doing a full collection every
		<constant number> of object creations entails a dramatic performance
		degradation in workloads which consist in creating and storing lots of
		long-lived objects (e.g. building a large list of GC-tracked objects would
		show quadratic performance, instead of linear as expected: see issue #4074).

		Using the above ratio, instead, yields amortized linear performance in
		the total number of objects (the effect of which can be summarized
		thusly: "each full garbage collection is more and more costly as the
		number of objects grows, but we do fewer and fewer of them").

		This heuristic was suggested by Martin von Löwis on python-dev in
		June 2008. His original analysis and proposal can be found at:
		http://mail.python.org/pipermail/python-dev/2008-June/080579.html
		*/
		if (i == NUM_GENERATIONS - 1
		&& gcstate->long_lived_pending < gcstate->long_lived_total / 4)
		{
		continue;
		}
		return i;
		}
		}
		return -1;
		}

		static void
		cleanup_worklist(struct worklist *worklist)
		{
Expand All		@@ -952,6 +899,21 @@ cleanup_worklist(struct worklist *worklist)
		}
		}

		static bool
		gc_should_collect(GCState *gcstate)
		{
		int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count);
		int threshold = gcstate->generations[0].threshold;
		if (count <= threshold \|\| threshold == 0 \|\| !gcstate->enabled) {
		return false;
		}
		// Avoid quadratic behavior by scaling threshold to the number of live
		// objects. A few tests rely on immediate scheduling of the GC so we ignore
		// the scaled threshold if generations[1].threshold is set to zero.
		return (count > gcstate->long_lived_total / 4 \|\|
		gcstate->generations[1].threshold == 0);
		}

		static void
		gc_collect_internal(PyInterpreterState interp, struct collection_state state)
		{
Expand DownExpand Up		@@ -1029,15 +991,10 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
		return 0;
		}

		if (generation == GENERATION_AUTO) {
		// Select the oldest generation that needs collecting. We will collect
		// objects from that generation and all generations younger than it.
		generation = gc_select_generation(gcstate);
		if (generation < 0) {
		// No generation needs to be collected.
		_Py_atomic_store_int(&gcstate->collecting, 0);
		return 0;
		}
		if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(gcstate)) {
		// Don't collect if the threshold is not exceeded.
		_Py_atomic_store_int(&gcstate->collecting, 0);
		return 0;
		}

		assert(generation >= 0 && generation < NUM_GENERATIONS);
Expand DownExpand Up		@@ -1082,6 +1039,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)

		m = state.collected;
		n = state.uncollectable;
		gcstate->long_lived_total = state.long_lived_total;

		if (gcstate->debug & _PyGC_DEBUG_STATS) {
		double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
Expand DownExpand Up		@@ -1523,12 +1481,10 @@ _PyObject_GC_Link(PyObject *op)
		{
		PyThreadState *tstate = _PyThreadState_GET();
		GCState *gcstate = &tstate->interp->gc;
		gcstate->generations[0].count++; /* number of allocated GC objects */
		if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
		gcstate->enabled &&
		gcstate->generations[0].threshold &&
		!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
		!_PyErr_Occurred(tstate))
		gcstate->generations[0].count++;

		if (gc_should_collect(gcstate) &&
		!_Py_atomic_load_int_relaxed(&gcstate->collecting))
		{
		_Py_ScheduleGC(tstate->interp);
		}
Expand All		@@ -1537,7 +1493,7 @@ _PyObject_GC_Link(PyObject *op)
		void
		_Py_RunGC(PyThreadState *tstate)
		{
		gc_collect_main(tstate,GENERATION_AUTO, _Py_GC_REASON_HEAP);
		gc_collect_main(tstate,0, _Py_GC_REASON_HEAP);
		}

		static PyObject *
Expand Down