17
17
18
18
#include "pydtrace.h"
19
19
20
+ // Platform-specific includes for get_current_rss().
21
+ #ifdef _WIN32
22
+ #include <windows.h>
23
+ #include <psapi.h> // For GetProcessMemoryInfo
24
+ #elif defined(__linux__ )
25
+ #include <unistd.h> // For sysconf, getpid
26
+ #elif defined(__APPLE__ )
27
+ #include <mach/mach.h>
28
+ #include <unistd.h> // For sysconf, getpid
29
+ #elif defined(__FreeBSD__ )
30
+ #include <sys/types.h>
31
+ #include <sys/sysctl.h>
32
+ #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
33
+ #include <kvm.h>
34
+ #include <unistd.h> // For sysconf, getpid
35
+ #include <fcntl.h> // For O_RDONLY
36
+ #include <limits.h> // For _POSIX2_LINE_MAX
37
+ #elif defined(__OpenBSD__ )
38
+ #include <sys/types.h>
39
+ #include <sys/sysctl.h>
40
+ #include <sys/user.h> // For kinfo_proc
41
+ #include <unistd.h> // For sysconf, getpid
42
+ #endif
20
43
21
44
// enable the "mark alive" pass of GC
22
45
#define GC_ENABLE_MARK_ALIVE 1
@@ -1878,6 +1901,180 @@ cleanup_worklist(struct worklist *worklist)
1878
1901
}
1879
1902
}
1880
1903
1904
+ // Return the current resident set size (RSS) of the process, in units of KB.
1905
+ // Returns -1 if this operation is not supported or on failure.
1906
+ static Py_ssize_t
1907
+ get_current_rss (void )
1908
+ {
1909
+ #ifdef _WIN32
1910
+ // Windows implementation using GetProcessMemoryInfo
1911
+ PROCESS_MEMORY_COUNTERS pmc ;
1912
+ HANDLE hProcess = GetCurrentProcess ();
1913
+ if (NULL == hProcess ) {
1914
+ // Should not happen for the current process
1915
+ return -1 ;
1916
+ }
1917
+
1918
+ // GetProcessMemoryInfo returns non-zero on success
1919
+ if (GetProcessMemoryInfo (hProcess ,& pmc ,sizeof (pmc ))) {
1920
+ // pmc.WorkingSetSize is in bytes. Convert to KB.
1921
+ return (Py_ssize_t )(pmc .WorkingSetSize /1024 );
1922
+ }
1923
+ else {
1924
+ return -1 ;
1925
+ }
1926
+
1927
+ #elif __linux__
1928
+ // Linux implementation using /proc/self/statm
1929
+ long page_size_bytes = sysconf (_SC_PAGE_SIZE );
1930
+ if (page_size_bytes <=0 ) {
1931
+ return -1 ;
1932
+ }
1933
+
1934
+ FILE * fp = fopen ("/proc/self/statm" ,"r" );
1935
+ if (fp == NULL ) {
1936
+ return -1 ;
1937
+ }
1938
+
1939
+ // Second number is resident size in pages
1940
+ long rss_pages ;
1941
+ if (fscanf (fp ,"%*d %ld" ,& rss_pages )!= 1 ) {
1942
+ fclose (fp );
1943
+ return -1 ;
1944
+ }
1945
+ fclose (fp );
1946
+
1947
+ // Sanity check
1948
+ if (rss_pages < 0 || rss_pages > 1000000000 ) {
1949
+ return -1 ;
1950
+ }
1951
+
1952
+ // Convert unit to KB
1953
+ return (Py_ssize_t )rss_pages * (page_size_bytes /1024 );
1954
+
1955
+ #elif defined(__APPLE__ )
1956
+ // --- MacOS (Darwin) ---
1957
+ mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT ;
1958
+ mach_task_basic_info_data_t info ;
1959
+ kern_return_t kerr ;
1960
+
1961
+ kerr = task_info (mach_task_self (),MACH_TASK_BASIC_INFO , (task_info_t )& info ,& count );
1962
+ if (kerr != KERN_SUCCESS ) {
1963
+ return -1 ;
1964
+ }
1965
+ // info.resident_size is in bytes. Convert to KB.
1966
+ return (Py_ssize_t )(info .resident_size /1024 );
1967
+
1968
+ #elif defined(__FreeBSD__ )
1969
+ long page_size_kb = sysconf (_SC_PAGESIZE ) /1024 ;
1970
+ if (page_size_kb <=0 ) {
1971
+ return -1 ;
1972
+ }
1973
+
1974
+ // Using /dev/null for vmcore avoids needing dump file.
1975
+ // NULL for kernel file uses running kernel.
1976
+ char errbuf [_POSIX2_LINE_MAX ];// For kvm error messages
1977
+ kvm_t * kd = kvm_openfiles (NULL ,"/dev/null" ,NULL ,O_RDONLY ,errbuf );
1978
+ if (kd == NULL ) {
1979
+ return -1 ;
1980
+ }
1981
+
1982
+ // KERN_PROC_PID filters for the specific process ID
1983
+ // n_procs will contain the number of processes returned (should be 1 or 0)
1984
+ pid_t pid = getpid ();
1985
+ int n_procs ;
1986
+ struct kinfo_proc * kp = kvm_getprocs (kd ,KERN_PROC_PID ,pid ,& n_procs );
1987
+ if (kp == NULL ) {
1988
+ kvm_close (kd );
1989
+ return -1 ;
1990
+ }
1991
+
1992
+ Py_ssize_t rss_kb = -1 ;
1993
+ if (n_procs > 0 ) {
1994
+ // kp[0] contains the info for our process
1995
+ // ki_rssize is in pages. Convert to KB.
1996
+ rss_kb = (Py_ssize_t )kp -> ki_rssize * page_size_kb ;
1997
+ }
1998
+ else {
1999
+ // Process with PID not found, shouldn't happen for self.
2000
+ rss_kb = -1 ;
2001
+ }
2002
+
2003
+ kvm_close (kd );
2004
+ return rss_kb ;
2005
+
2006
+ #elif defined(__OpenBSD__ )
2007
+ long page_size_kb = sysconf (_SC_PAGESIZE ) /1024 ;
2008
+ if (page_size_kb <=0 ) {
2009
+ return -1 ;
2010
+ }
2011
+
2012
+ struct kinfo_proc kp ;
2013
+ pid_t pid = getpid ();
2014
+ int mib [6 ];
2015
+ size_t len = sizeof (kp );
2016
+
2017
+ mib [0 ]= CTL_KERN ;
2018
+ mib [1 ]= KERN_PROC ;
2019
+ mib [2 ]= KERN_PROC_PID ;
2020
+ mib [3 ]= pid ;
2021
+ mib [4 ]= sizeof (struct kinfo_proc );// size of the structure we want
2022
+ mib [5 ]= 1 ;// want 1 structure back
2023
+ if (sysctl (mib ,6 ,& kp ,& len ,NULL ,0 )== -1 ) {
2024
+ return -1 ;
2025
+ }
2026
+
2027
+ if (len > 0 ) {
2028
+ // p_vm_rssize is in pages on OpenBSD. Convert to KB.
2029
+ return (Py_ssize_t )kp .p_vm_rssize * page_size_kb ;
2030
+ }
2031
+ else {
2032
+ // Process info not returned
2033
+ return -1 ;
2034
+ }
2035
+ #else
2036
+ // Unsupported platform
2037
+ return -1 ;
2038
+ #endif
2039
+ }
2040
+
2041
+ static bool
2042
+ gc_should_collect_rss (GCState * gcstate )
2043
+ {
2044
+ Py_ssize_t rss = get_current_rss ();
2045
+ if (rss < 0 ) {
2046
+ // Reading RSS is not support or failed.
2047
+ return true;
2048
+ }
2049
+ int threshold = gcstate -> young .threshold ;
2050
+ Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed (& gcstate -> deferred_count );
2051
+ if (deferred > threshold * 40 ) {
2052
+ // Too many new container objects since last GC, even though RSS
2053
+ // might not have increased much. This is intended to avoid resource
2054
+ // exhaustion if some objects consume resources but don't result in a
2055
+ // RSS increase. We use 40x as the factor here because older versions
2056
+ // of Python would do full collections after roughly every 70,000 new
2057
+ // container objects.
2058
+ return true;
2059
+ }
2060
+ Py_ssize_t last_rss = gcstate -> last_rss ;
2061
+ Py_ssize_t rss_threshold = Py_MAX (last_rss /10 ,128 );
2062
+ if ((rss - last_rss )> rss_threshold ) {
2063
+ // The RSS has increased too much, do a collection.
2064
+ return true;
2065
+ }
2066
+ else {
2067
+ // The RSS has not increased enough, defer the collection and clear
2068
+ // the young object count so we don't check RSS again on the next call
2069
+ // to gc_should_collect().
2070
+ PyMutex_Lock (& gcstate -> mutex );
2071
+ gcstate -> deferred_count += gcstate -> young .count ;
2072
+ gcstate -> young .count = 0 ;
2073
+ PyMutex_Unlock (& gcstate -> mutex );
2074
+ return false;
2075
+ }
2076
+ }
2077
+
1881
2078
static bool
1882
2079
gc_should_collect (GCState * gcstate )
1883
2080
{
@@ -1887,11 +2084,17 @@ gc_should_collect(GCState *gcstate)
1887
2084
if (count <=threshold || threshold == 0 || !gc_enabled ) {
1888
2085
return false;
1889
2086
}
1890
- // Avoid quadratic behavior by scaling threshold to the number of live
1891
- // objects. A few tests rely on immediate scheduling of the GC so we ignore
1892
- // the scaled threshold if generations[1].threshold is set to zero.
1893
- return (count > gcstate -> long_lived_total /4 ||
1894
- gcstate -> old [0 ].threshold == 0 );
2087
+ if (gcstate -> old [0 ].threshold == 0 ) {
2088
+ // A few tests rely on immediate scheduling of the GC so we ignore the
2089
+ // extra conditions if generations[1].threshold is set to zero.
2090
+ return true;
2091
+ }
2092
+ if (count < gcstate -> long_lived_total /4 ) {
2093
+ // Avoid quadratic behavior by scaling threshold to the number of live
2094
+ // objects.
2095
+ return false;
2096
+ }
2097
+ return gc_should_collect_rss (gcstate );
1895
2098
}
1896
2099
1897
2100
static void
@@ -1940,6 +2143,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
1940
2143
}
1941
2144
1942
2145
state -> gcstate -> young .count = 0 ;
2146
+ state -> gcstate -> deferred_count = 0 ;
1943
2147
for (int i = 1 ;i <=generation ;++ i ) {
1944
2148
state -> gcstate -> old [i - 1 ].count = 0 ;
1945
2149
}
@@ -2033,6 +2237,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
2033
2237
// to be freed.
2034
2238
delete_garbage (state );
2035
2239
2240
+ // Store the current RSS, possibly smaller now that we deleted garbage.
2241
+ state -> gcstate -> last_rss = get_current_rss ();
2242
+
2036
2243
// Append objects with legacy finalizers to the "gc.garbage" list.
2037
2244
handle_legacy_finalizers (state );
2038
2245
}