17
17
18
18
#include "pydtrace.h"
19
19
20
+ // Platform-specific includes for get_current_rss().
21
+ #ifdef _WIN32
22
+ #include <windows.h>
23
+ #include <psapi.h> // For GetProcessMemoryInfo
24
+ #elif defined(__linux__ )
25
+ #include <unistd.h> // For sysconf, getpid
26
+ //#include <errno.h>
27
+ // #include <fcntl.h> // Only if using open/read directly
28
+ #elif defined(__APPLE__ )
29
+ // macOS (Darwin)
30
+ #include <mach/mach.h>
31
+ #include <unistd.h> // For sysconf, getpid
32
+ #elif defined(__FreeBSD__ )
33
+ // FreeBSD
34
+ #include <sys/types.h>
35
+ #include <sys/sysctl.h>
36
+ #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
37
+ #include <kvm.h>
38
+ #include <unistd.h> // For sysconf, getpid
39
+ #include <fcntl.h> // For O_RDONLY
40
+ #include <limits.h> // For _POSIX2_LINE_MAX
41
+ #elif defined(__OpenBSD__ )
42
+ // OpenBSD
43
+ #include <sys/types.h>
44
+ #include <sys/sysctl.h>
45
+ #include <sys/user.h> // For kinfo_proc
46
+ #include <unistd.h> // For sysconf, getpid
47
+ //#include <errno.h>
48
+ #endif
20
49
21
50
// enable the "mark alive" pass of GC
22
51
#define GC_ENABLE_MARK_ALIVE 1
@@ -1878,6 +1907,172 @@ cleanup_worklist(struct worklist *worklist)
1878
1907
}
1879
1908
}
1880
1909
1910
+ // Return the current resident set size (RSS) of the process, in units of KB.
1911
+ // Returns -1 if this operation is not supported or on failure.
1912
+ static Py_ssize_t
1913
+ get_current_rss (void )
1914
+ {
1915
+ #ifdef _WIN32
1916
+ // Windows implementation using GetProcessMemoryInfo
1917
+ PROCESS_MEMORY_COUNTERS pmc ;
1918
+ HANDLE hProcess = GetCurrentProcess ();
1919
+ if (NULL == hProcess ) {
1920
+ // Should not happen for the current process
1921
+ return -1 ;
1922
+ }
1923
+
1924
+ // GetProcessMemoryInfo returns non-zero on success
1925
+ if (GetProcessMemoryInfo (hProcess ,& pmc ,sizeof (pmc ))) {
1926
+ // pmc.WorkingSetSize is in bytes. Convert to KB.
1927
+ return (Py_ssize_t )(pmc .WorkingSetSize /1024 );
1928
+ }else {
1929
+ CloseHandle (hProcess );
1930
+ return -1 ;
1931
+ }
1932
+
1933
+ #elif __linux__
1934
+ // Linux implementation using /proc/self/statm
1935
+ long page_size_bytes = sysconf (_SC_PAGE_SIZE );
1936
+ if (page_size_bytes <=0 ) {
1937
+ return -1 ;
1938
+ }
1939
+
1940
+ FILE * fp = fopen ("/proc/self/statm" ,"r" );
1941
+ if (fp == NULL ) {
1942
+ return -1 ;
1943
+ }
1944
+
1945
+ // Second number is resident size in pages
1946
+ long rss_pages ;
1947
+ if (fscanf (fp ,"%*d %ld" ,& rss_pages )!= 1 ) {
1948
+ fclose (fp );
1949
+ return -1 ;
1950
+ }
1951
+ fclose (fp );
1952
+
1953
+ // Convert unit to KB
1954
+ return (Py_ssize_t )rss_pages * (page_size_bytes /1024 );
1955
+
1956
+ #elif defined(__APPLE__ )
1957
+ // --- MacOS (Darwin) ---
1958
+ mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT ;
1959
+ mach_task_basic_info_data_t info ;
1960
+ kern_return_t kerr ;
1961
+
1962
+ kerr = task_info (mach_task_self (),MACH_TASK_BASIC_INFO , (task_info_t )& info ,& count );
1963
+ if (kerr != KERN_SUCCESS ) {
1964
+ return -1 ;
1965
+ }
1966
+ // info.resident_size is in bytes. Convert to KB.
1967
+ return (Py_ssize_t )(info .resident_size /1024 );
1968
+
1969
+ #elif defined(__FreeBSD__ )
1970
+ long page_size_kb = sysconf (_SC_PAGESIZE ) /1024 ;
1971
+ if (page_size_kb <=0 ) {
1972
+ return -1 ;
1973
+ }
1974
+
1975
+ // Using /dev/null for vmcore avoids needing dump file.
1976
+ // NULL for kernel file uses running kernel.
1977
+ char errbuf [_POSIX2_LINE_MAX ];// For kvm error messages
1978
+ kvm_t * kd = kvm_openfiles (NULL ,"/dev/null" ,NULL ,O_RDONLY ,errbuf );
1979
+ if (kd == NULL ) {
1980
+ return -1 ;
1981
+ }
1982
+
1983
+ // KERN_PROC_PID filters for the specific process ID
1984
+ // n_procs will contain the number of processes returned (should be 1 or 0)
1985
+ pid_t pid = getpid ();
1986
+ int n_procs ;
1987
+ struct kinfo_proc * kp = kvm_getprocs (kd ,KERN_PROC_PID ,pid ,& n_procs );
1988
+ if (kp == NULL ) {
1989
+ kvm_close (kd );
1990
+ return -1 ;
1991
+ }
1992
+
1993
+ Py_ssize_t rss_kb = -1 ;
1994
+ if (n_procs > 0 ) {
1995
+ // kp[0] contains the info for our process
1996
+ // ki_rssize is in pages. Convert to KB.
1997
+ rss_kb = (Py_ssize_t )kp -> ki_rssize * page_size_kb ;
1998
+ }else {
1999
+ // Process with PID not found, shouldn't happen for self.
2000
+ rss_kb = -1 ;
2001
+ }
2002
+
2003
+ kvm_close (kd );
2004
+ return rss_kb ;
2005
+
2006
+ #elif defined(__OpenBSD__ )
2007
+ long page_size_kb = sysconf (_SC_PAGESIZE ) /1024 ;
2008
+ if (page_size_kb <=0 ) {
2009
+ return -1 ;
2010
+ }
2011
+
2012
+ struct kinfo_proc kp ;
2013
+ pid_t pid = getpid ();
2014
+ int mib [6 ];
2015
+ size_t len = sizeof (kp );
2016
+
2017
+ mib [0 ]= CTL_KERN ;
2018
+ mib [1 ]= KERN_PROC ;
2019
+ mib [2 ]= KERN_PROC_PID ;
2020
+ mib [3 ]= pid ;
2021
+ mib [4 ]= sizeof (struct kinfo_proc );// size of the structure we want
2022
+ mib [5 ]= 1 ;// want 1 structure back
2023
+ if (sysctl (mib ,6 ,& kp ,& len ,NULL ,0 )== -1 ) {
2024
+ return -1 ;
2025
+ }
2026
+
2027
+ if (len > 0 ) {
2028
+ // p_vm_rssize is in pages on OpenBSD. Convert to KB.
2029
+ return (Py_ssize_t )kp .p_vm_rssize * page_size_kb ;
2030
+ }else {
2031
+ // Process info not returned
2032
+ return -1 ;
2033
+ }
2034
+ #else
2035
+ // Unsupported platform
2036
+ return -1 ;
2037
+ #endif
2038
+ }
2039
+
2040
+ static bool
2041
+ gc_should_collect_rss (GCState * gcstate )
2042
+ {
2043
+ Py_ssize_t rss = get_current_rss ();
2044
+ if (rss < 0 ) {
2045
+ // Reading RSS is not support or failed.
2046
+ return true;
2047
+ }
2048
+ int threshold = gcstate -> young .threshold ;
2049
+ if (gcstate -> deferred_count > threshold * 40 ) {
2050
+ // Too many new container objects since last GC, even though RSS
2051
+ // might not have increased much. This is intended to avoid resource
2052
+ // exhaustion if some objects consume resources but don't result in a
2053
+ // RSS increase. We use 40x as the factor here because older versions
2054
+ // of Python would do full collections after roughly every 70,000 new
2055
+ // container objects.
2056
+ return true;
2057
+ }
2058
+ Py_ssize_t last_rss = gcstate -> last_rss ;
2059
+ Py_ssize_t rss_threshold = Py_MAX (last_rss /10 ,128 );
2060
+ if ((rss - last_rss )> rss_threshold ) {
2061
+ // The RSS has increased too much, do a collection.
2062
+ return true;
2063
+ }
2064
+ else {
2065
+ // The RSS has not increased enough, defer the collection and clear
2066
+ // the young object count so we don't check RSS again on the next call
2067
+ // to gc_should_collect().
2068
+ Py_BEGIN_CRITICAL_SECTION_MUT (& gcstate -> mutex );
2069
+ gcstate -> deferred_count += gcstate -> young .count ;
2070
+ gcstate -> young .count = 0 ;
2071
+ Py_END_CRITICAL_SECTION ();
2072
+ return false;
2073
+ }
2074
+ }
2075
+
1881
2076
static bool
1882
2077
gc_should_collect (GCState * gcstate )
1883
2078
{
@@ -1887,11 +2082,17 @@ gc_should_collect(GCState *gcstate)
1887
2082
if (count <=threshold || threshold == 0 || !gc_enabled ) {
1888
2083
return false;
1889
2084
}
1890
- // Avoid quadratic behavior by scaling threshold to the number of live
1891
- // objects. A few tests rely on immediate scheduling of the GC so we ignore
1892
- // the scaled threshold if generations[1].threshold is set to zero.
1893
- return (count > gcstate -> long_lived_total /4 ||
1894
- gcstate -> old [0 ].threshold == 0 );
2085
+ if (gcstate -> old [0 ].threshold == 0 ) {
2086
+ // A few tests rely on immediate scheduling of the GC so we ignore the
2087
+ // extra conditions if generations[1].threshold is set to zero.
2088
+ return true;
2089
+ }
2090
+ if (count < gcstate -> long_lived_total /4 ) {
2091
+ // Avoid quadratic behavior by scaling threshold to the number of live
2092
+ // objects.
2093
+ return false;
2094
+ }
2095
+ return gc_should_collect_rss (gcstate );
1895
2096
}
1896
2097
1897
2098
static void
@@ -1940,6 +2141,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
1940
2141
}
1941
2142
1942
2143
state -> gcstate -> young .count = 0 ;
2144
+ state -> gcstate -> deferred_count = 0 ;
1943
2145
for (int i = 1 ;i <=generation ;++ i ) {
1944
2146
state -> gcstate -> old [i - 1 ].count = 0 ;
1945
2147
}
@@ -2033,6 +2235,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
2033
2235
// to be freed.
2034
2236
delete_garbage (state );
2035
2237
2238
+ // Store the current RSS, possibly smaller now that we deleted garbage.
2239
+ state -> gcstate -> last_rss = get_current_rss ();
2240
+
2036
2241
// Append objects with legacy finalizers to the "gc.garbage" list.
2037
2242
handle_legacy_finalizers (state );
2038
2243
}