Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025
diff --git a/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst
 Fix several crashes due to reading invalid memory in the new Tachyon
 sampling profiler. Patch by Pablo Galindo.
diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h
 #define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
 #define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)

 /* Maximum sizes for validation to prevent buffer overflows from corrupted data */
 #define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024)  /* 16 MB max for stack chunks */
 #define MAX_LONG_DIGITS 64  /* Allows values up to ~2^1920 */
 #define MAX_SET_TABLE_SIZE (1 << 20)  /* 1 million entries max for set iteration */

 #ifndef MAX
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #endif
 extern bool parse_linetable(
    const uintptr_t addrq,
    const char* linetable,
    Py_ssize_t linetable_size,
    int firstlineno,
    LocationInfo* info
 );
diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c
    }

    Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used);
    Py_ssize_tset_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1;
    Py_ssize_tmask = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask);
    uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table);

    // Validate mask and num_els to prevent huge loop iterations from garbage data
    if (mask < 0 || mask >= MAX_SET_TABLE_SIZE || num_els < 0 || num_els > mask + 1) {
        set_exception_cause(unwinder, PyExc_RuntimeError,
            "Invalid set object (corrupted remote memory)");
        return -1;
    }
    Py_ssize_t set_len = mask + 1;

    Py_ssize_t i = 0;
    Py_ssize_t els = 0;
    while (i < set_len && els < num_els) {
            return -1;
        }

        if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) {
        uintptr_t next_node = GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next);
        if (next_node == 0) {
            PyErr_SetString(PyExc_RuntimeError,
                           "Invalid linked list structure reading remote memory");
            set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list");
            return -1;
        }

        uintptr_t task_addr =(uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next)
        uintptr_t task_addr =next_node
            - (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node;

        if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
        // Read next node
        if (_Py_RemoteDebug_PagedReadRemoteMemory(
                &unwinder->handle,
 (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next),
 next_node,
                sizeof(task_node),
                task_node) < 0) {
            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by");
diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c
 * LINE TABLE PARSING FUNCTIONS
 * ============================================================================ */

 // Inline helper for bounds-checked byte reading (no function call overhead)
 static inline int
 read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out)
 {
    if (*ptr >= end) {
        return -1;
    }
    *out = *(*ptr)++;
    return 0;
 }

 static int
 scan_varint(const uint8_t **ptr)
 scan_varint(const uint8_t **ptr, const uint8_t *end)
 {
    unsigned int read = **ptr;
    *ptr = *ptr + 1;
    uint8_t read;
    if (read_byte(ptr, end, &read) < 0) {
        return -1;
    }
    unsigned int val = read & 63;
    unsigned int shift = 0;
    while (read & 64) {
        read = **ptr;
        *ptr = *ptr + 1;
        if (read_byte(ptr, end, &read) < 0) {
            return -1;
        }
        shift += 6;
        // Prevent infinite loop on malformed data (shift overflow)
        if (shift > 28) {
            return -1;
        }
        val |= (read & 63) << shift;
    }
    return val;
    return(int)val;
 }

 static int
 scan_signed_varint(const uint8_t **ptr)
 scan_signed_varint(const uint8_t **ptr, const uint8_t *end)
 {
    unsigned int uval = scan_varint(ptr);
    int uval = scan_varint(ptr, end);
    if (uval < 0) {
        return INT_MIN;  // Error sentinel (valid signed varints won't be INT_MIN)
    }
    if (uval & 1) {
        return -(int)(uval >> 1);
        return -(int)((unsigned int)uval >> 1);
    }
    else {
        return uval >> 1;
        return(int)((unsigned int)uval >> 1);
    }
 }

 bool
 parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
 parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size,
                int firstlineno, LocationInfo* info)
 {
    // Reject garbage: zero or negative size
    if (linetable_size <= 0) {
        return false;
    }

    const uint8_t* ptr = (const uint8_t*)(linetable);
    const uint8_t* end = ptr + linetable_size;
    uintptr_t addr = 0;
    int computed_line = firstlineno;  // Running accumulator, separate from output
    int val;  // Temporary for varint results
    uint8_t byte;  // Temporary for byte reads
    const size_t MAX_LINETABLE_ENTRIES = 65536;
    size_t entry_count = 0;

    while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
    while (ptr < end &&*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
        entry_count++;
        uint8_t first_byte = *(ptr++);
        uint8_t code = (first_byte >> 3) & 15;
                info->column = info->end_column = -1;
                break;
            case PY_CODE_LOCATION_INFO_LONG:
                computed_line += scan_signed_varint(&ptr);
                val = scan_signed_varint(&ptr, end);
                if (val == INT_MIN) {
                    return false;
                }
                computed_line += val;
                info->lineno = computed_line;
                info->end_lineno = computed_line + scan_varint(&ptr);
                info->column = scan_varint(&ptr) - 1;
                info->end_column = scan_varint(&ptr) - 1;
                val = scan_varint(&ptr, end);
                if (val < 0) {
                    return false;
                }
                info->end_lineno = computed_line + val;
                val = scan_varint(&ptr, end);
                if (val < 0) {
                    return false;
                }
                info->column = val - 1;
                val = scan_varint(&ptr, end);
                if (val < 0) {
                    return false;
                }
                info->end_column = val - 1;
                break;
            case PY_CODE_LOCATION_INFO_NO_COLUMNS:
                computed_line += scan_signed_varint(&ptr);
                val = scan_signed_varint(&ptr, end);
                if (val == INT_MIN) {
                    return false;
                }
                computed_line += val;
                info->lineno = info->end_lineno = computed_line;
                info->column = info->end_column = -1;
                break;
            case PY_CODE_LOCATION_INFO_ONE_LINE2:
                computed_line += code - 10;
                info->lineno = info->end_lineno = computed_line;
                info->column = *(ptr++);
                info->end_column = *(ptr++);
                if (read_byte(&ptr, end, &byte) < 0) {
                    return false;
                }
                info->column = byte;
                if (read_byte(&ptr, end, &byte) < 0) {
                    return false;
                }
                info->end_column = byte;
                break;
            default: {
                uint8_t second_byte = *(ptr++);
                if ((second_byte & 128) != 0) {
                if (read_byte(&ptr, end, &byte) < 0) {
                    return false;
                }
                if ((byte & 128) != 0) {
                    return false;
                }
                info->lineno = info->end_lineno = computed_line;
                info->column = code << 3 | (second_byte >> 4);
                info->end_column = info->column + (second_byte & 15);
                info->column = code << 3 | (byte >> 4);
                info->end_column = info->column + (byte & 15);
                break;
            }
        }
        tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
    }

    if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
        assert(ctx->tlbc_index >= 0);
    // Validate tlbc_index and check TLBC cache
    if (tlbc_entry) {
        // Validate index bounds (also catches negative values since tlbc_index is signed)
        if (ctx->tlbc_index < 0 || ctx->tlbc_index >= tlbc_entry->tlbc_array_size) {
            set_exception_cause(unwinder, PyExc_RuntimeError,
                "Invalid tlbc_index (corrupted remote memory)");
            goto error;
        }
        assert(tlbc_entry->tlbc_array_size > 0);
        // Use cached TLBC data
        uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
        }
    }

    // Fall back to main bytecode
    // Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0)
    addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;

 done_tlbc:
    ;  // Empty statement to avoid C23 extension warning
    LocationInfo info = {0};
    bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
                              PyBytes_GET_SIZE(meta->linetable),
                              meta->first_lineno, &info);
    if (!ok) {
        info.lineno = -1;
diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c
    // Check actual size and reread if necessary
    size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
    if (actual_size != current_size) {
        // Validate size: reject garbage (too small or unreasonably large)
        // Size must be at least enough for the header and reasonably bounded
        if (actual_size <= offsetof(_PyStackChunk, data) || actual_size > MAX_STACK_CHUNK_SIZE) {
            PyMem_RawFree(this_chunk);
            set_exception_cause(unwinder, PyExc_RuntimeError,
                "Invalid stack chunk size (corrupted remote memory)");
            return -1;
        }

        this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
        if (!this_chunk) {
            PyErr_NoMemory();
 find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
 {
    for (size_t i = 0; i < chunks->count; ++i) {
        assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data));
        // Validate size: reject garbage that would cause underflow
        if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) {
            // Skip this chunk - corrupted size from remote memory
            continue;
        }
        uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
        size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);

diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c
        }

        while (current_tstate != 0) {
            uintptr_t prev_tstate = current_tstate;
            PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate,
                                                           gil_holder_tstate,
                                                           gc_frame);
            if (!frame_info) {
                // Check if this was an intentional skip due to mode-based filtering
                if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL ||
                     self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) {
                    // Detect cycle: if current_tstate didn't advance, we have corrupted data
                    if (current_tstate == prev_tstate) {
                        Py_DECREF(interpreter_threads);
                        set_exception_cause(self, PyExc_RuntimeError,
                            "Thread list cycle detected (corrupted remote memory)");
                        Py_CLEAR(result);
                        goto exit;
                    }
                    // Thread was skipped due to mode filtering, continue to next thread
                    continue;
                }
diff --git a/Modules/_remote_debugging/object_reading.c b/Modules/_remote_debugging/object_reading.c
        return 0;
    }

    // If the long object has inline digits, use them directly
    // Validate size: reject garbage (negative or unreasonably large)
    if (size < 0 || size > MAX_LONG_DIGITS) {
        set_exception_cause(unwinder, PyExc_RuntimeError,
            "Invalid PyLong size (corrupted remote memory)");
        return -1;
    }

    // Calculate how many digits fit inline in our local buffer
    Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit;
    Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset;
    Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit);

    // If the long object has inline digits that fit in our buffer, use them directly
    digit *digits;
    if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
    if (size <=max_inline_digits && size <=_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
        // For small integers, digits are inline in the long_value.ob_digit array
        digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
        if (!digits) {
            PyErr_NoMemory();
            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong");
            return -1;
        }
        memcpy(digits, long_obj +unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
        memcpy(digits, long_obj +ob_digit_offset, size * sizeof(digit));
    } else {
        // For larger integers, we need to read the digits separately
        digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fix several crashes due to reading invalid memory in the new Tachyon
		sampling profiler. Patch by Pablo Galindo.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -140,6 +140,11 @@ typedef enum _WIN32_THREADSTATE {
		#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
		#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)

		/* Maximum sizes for validation to prevent buffer overflows from corrupted data */
		#define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks */
		#define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */
		#define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set iteration */

		#ifndef MAX
		#define MAX(a, b) ((a) > (b) ? (a) : (b))
		#endif
Expand DownExpand Up		@@ -451,6 +456,7 @@ extern PyObject *make_frame_info(
		extern bool parse_linetable(
		const uintptr_t addrq,
		const char* linetable,
		Py_ssize_t linetable_size,
		int firstlineno,
		LocationInfo* info
		);
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -112,9 +112,17 @@ iterate_set_entries(
		}

		Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used);
		Py_ssize_tset_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1;
		Py_ssize_tmask = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask);
		uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table);

		// Validate mask and num_els to prevent huge loop iterations from garbage data
		if (mask < 0 \|\| mask >= MAX_SET_TABLE_SIZE \|\| num_els < 0 \|\| num_els > mask + 1) {
		set_exception_cause(unwinder, PyExc_RuntimeError,
		"Invalid set object (corrupted remote memory)");
		return -1;
		}
		Py_ssize_t set_len = mask + 1;

		Py_ssize_t i = 0;
		Py_ssize_t els = 0;
		while (i < set_len && els < num_els) {
Expand DownExpand Up		@@ -812,14 +820,15 @@ append_awaited_by_for_thread(
		return -1;
		}

		if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) {
		uintptr_t next_node = GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next);
		if (next_node == 0) {
		PyErr_SetString(PyExc_RuntimeError,
		"Invalid linked list structure reading remote memory");
		set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list");
		return -1;
		}

		uintptr_t task_addr =(uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next)
		uintptr_t task_addr =next_node
		- (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node;

		if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
Expand All		@@ -830,7 +839,7 @@ append_awaited_by_for_thread(
		// Read next node
		if (_Py_RemoteDebug_PagedReadRemoteMemory(
		&unwinder->handle,
		(uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next),
		next_node,
		sizeof(task_node),
		task_node) < 0) {
		set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by");
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -123,44 +123,74 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
		* LINE TABLE PARSING FUNCTIONS
		* ============================================================================ */

		// Inline helper for bounds-checked byte reading (no function call overhead)
		static inline int
		read_byte(const uint8_t *ptr, const uint8_t end, uint8_t *out)
		{
		if (*ptr >= end) {
		return -1;
		}
		out = (*ptr)++;
		return 0;
		}

		static int
		scan_varint(const uint8_t **ptr)
		scan_varint(const uint8_t *ptr, const uint8_t end)
		{
		unsigned int read = **ptr;
		ptr = ptr + 1;
		uint8_t read;
		if (read_byte(ptr, end, &read) < 0) {
		return -1;
		}
		unsigned int val = read & 63;
		unsigned int shift = 0;
		while (read & 64) {
		read = **ptr;
		ptr = ptr + 1;
		if (read_byte(ptr, end, &read) < 0) {
		return -1;
		}
		shift += 6;
		// Prevent infinite loop on malformed data (shift overflow)
		if (shift > 28) {
		return -1;
		}
		val \|= (read & 63) << shift;
		}
		return val;
		return(int)val;
		}

		static int
		scan_signed_varint(const uint8_t **ptr)
		scan_signed_varint(const uint8_t *ptr, const uint8_t end)
		{
		unsigned int uval = scan_varint(ptr);
		int uval = scan_varint(ptr, end);
		if (uval < 0) {
		return INT_MIN; // Error sentinel (valid signed varints won't be INT_MIN)
		}
		if (uval & 1) {
		return -(int)(uval >> 1);
		return -(int)((unsigned int)uval >> 1);
		}
		else {
		return uval >> 1;
		return(int)((unsigned int)uval >> 1);
		}
		}

		bool
		parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
		parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size,
		int firstlineno, LocationInfo* info)
		{
		// Reject garbage: zero or negative size
		if (linetable_size <= 0) {
		return false;
		}

		const uint8_t* ptr = (const uint8_t*)(linetable);
		const uint8_t* end = ptr + linetable_size;
		uintptr_t addr = 0;
		int computed_line = firstlineno; // Running accumulator, separate from output
		int val; // Temporary for varint results
		uint8_t byte; // Temporary for byte reads
		const size_t MAX_LINETABLE_ENTRIES = 65536;
		size_t entry_count = 0;

		while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
		while (ptr < end &&*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
		entry_count++;
		uint8_t first_byte = *(ptr++);
		uint8_t code = (first_byte >> 3) & 15;
Expand All		@@ -173,14 +203,34 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
		info->column = info->end_column = -1;
		break;
		case PY_CODE_LOCATION_INFO_LONG:
		computed_line += scan_signed_varint(&ptr);
		val = scan_signed_varint(&ptr, end);
		if (val == INT_MIN) {
		return false;
		}
		computed_line += val;
		info->lineno = computed_line;
		info->end_lineno = computed_line + scan_varint(&ptr);
		info->column = scan_varint(&ptr) - 1;
		info->end_column = scan_varint(&ptr) - 1;
		val = scan_varint(&ptr, end);
		if (val < 0) {
		return false;
		}
		info->end_lineno = computed_line + val;
		val = scan_varint(&ptr, end);
		if (val < 0) {
		return false;
		}
		info->column = val - 1;
		val = scan_varint(&ptr, end);
		if (val < 0) {
		return false;
		}
		info->end_column = val - 1;
		break;
		case PY_CODE_LOCATION_INFO_NO_COLUMNS:
		computed_line += scan_signed_varint(&ptr);
		val = scan_signed_varint(&ptr, end);
		if (val == INT_MIN) {
		return false;
		}
		computed_line += val;
		info->lineno = info->end_lineno = computed_line;
		info->column = info->end_column = -1;
		break;
Expand All		@@ -189,17 +239,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
		case PY_CODE_LOCATION_INFO_ONE_LINE2:
		computed_line += code - 10;
		info->lineno = info->end_lineno = computed_line;
		info->column = *(ptr++);
		info->end_column = *(ptr++);
		if (read_byte(&ptr, end, &byte) < 0) {
		return false;
		}
		info->column = byte;
		if (read_byte(&ptr, end, &byte) < 0) {
		return false;
		}
		info->end_column = byte;
		break;
		default: {
		uint8_t second_byte = *(ptr++);
		if ((second_byte & 128) != 0) {
		if (read_byte(&ptr, end, &byte) < 0) {
		return false;
		}
		if ((byte & 128) != 0) {
		return false;
		}
		info->lineno = info->end_lineno = computed_line;
		info->column = code << 3 \| (second_byte >> 4);
		info->end_column = info->column + (second_byte & 15);
		info->column = code << 3 \| (byte >> 4);
		info->end_column = info->column + (byte & 15);
		break;
		}
		}
Expand DownExpand Up		@@ -384,8 +442,14 @@ parse_code_object(RemoteUnwinderObject *unwinder,
		tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
		}

		if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
		assert(ctx->tlbc_index >= 0);
		// Validate tlbc_index and check TLBC cache
		if (tlbc_entry) {
		// Validate index bounds (also catches negative values since tlbc_index is signed)
		if (ctx->tlbc_index < 0 \|\| ctx->tlbc_index >= tlbc_entry->tlbc_array_size) {
		set_exception_cause(unwinder, PyExc_RuntimeError,
		"Invalid tlbc_index (corrupted remote memory)");
		goto error;
		}
		assert(tlbc_entry->tlbc_array_size > 0);
		// Use cached TLBC data
		uintptr_t entries = (uintptr_t )((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
Expand All		@@ -398,7 +462,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
		}
		}

		// Fall back to main bytecode
		// Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0)
		addrq = (uint16_t )ip - (uint16_t )meta->addr_code_adaptive;

		done_tlbc:
Expand All		@@ -409,6 +473,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
		; // Empty statement to avoid C23 extension warning
		LocationInfo info = {0};
		bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
		PyBytes_GET_SIZE(meta->linetable),
		meta->first_lineno, &info);
		if (!ok) {
		info.lineno = -1;
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -45,6 +45,15 @@ process_single_stack_chunk(
		// Check actual size and reread if necessary
		size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
		if (actual_size != current_size) {
		// Validate size: reject garbage (too small or unreasonably large)
		// Size must be at least enough for the header and reasonably bounded
		if (actual_size <= offsetof(_PyStackChunk, data) \|\| actual_size > MAX_STACK_CHUNK_SIZE) {
Copy link Member Fidget-SpinnerDec 26, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I think this is looser than I'd like for`_PyStackChunk`, but whatever. Copy link MemberAuthor pablogsalDec 26, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Do you have any suggestion? Copy link Member Fidget-SpinnerDec 26, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I wish we had a tighter bound, but I'm not sure what would be appropiate for a real world stack frame? So that's why I said whatever :) Copy link MemberAuthor pablogsalDec 26, 2025• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I wish we had a tighter bound, but I'm not sure what would be appropiate for a real world stack frame? But this is for the entire chunk no? Chunks will grow from 16Kb to whatever, we just need to ensure we don't copy too much because we just read a garbage size to copy. Chunks here are just an optimization so if we fail to read the chunks we will fallback to read frame-by-frame (which sucks but it works). Copy link Member Fidget-SpinnerDec 26, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Oh I didn't know you do the second part (fallback to read one by one). I think that's fine then.
		PyMem_RawFree(this_chunk);
		set_exception_cause(unwinder, PyExc_RuntimeError,
		"Invalid stack chunk size (corrupted remote memory)");
		return -1;
		}

		this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
		if (!this_chunk) {
		PyErr_NoMemory();
Expand DownExpand Up		@@ -129,7 +138,11 @@ void *
		find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
		{
		for (size_t i = 0; i < chunks->count; ++i) {
		assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data));
		// Validate size: reject garbage that would cause underflow
		if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) {
		// Skip this chunk - corrupted size from remote memory
		continue;
		}
		uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
		size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -584,13 +584,22 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
		}

		while (current_tstate != 0) {
		uintptr_t prev_tstate = current_tstate;
		PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate,
		gil_holder_tstate,
		gc_frame);
		if (!frame_info) {
		// Check if this was an intentional skip due to mode-based filtering
		if ((self->mode == PROFILING_MODE_CPU \|\| self->mode == PROFILING_MODE_GIL \|\|
		self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) {
		// Detect cycle: if current_tstate didn't advance, we have corrupted data
		if (current_tstate == prev_tstate) {
		Py_DECREF(interpreter_threads);
		set_exception_cause(self, PyExc_RuntimeError,
		"Thread list cycle detected (corrupted remote memory)");
		Py_CLEAR(result);
		goto exit;
		}
		// Thread was skipped due to mode filtering, continue to next thread
		continue;
		}
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -194,17 +194,29 @@ read_py_long(
		return 0;
		}

		// If the long object has inline digits, use them directly
		// Validate size: reject garbage (negative or unreasonably large)
		if (size < 0 \|\| size > MAX_LONG_DIGITS) {
		set_exception_cause(unwinder, PyExc_RuntimeError,
		"Invalid PyLong size (corrupted remote memory)");
		return -1;
		}

		// Calculate how many digits fit inline in our local buffer
		Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit;
		Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset;
		Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit);

		// If the long object has inline digits that fit in our buffer, use them directly
		digit *digits;
		if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
		if (size <=max_inline_digits && size <=_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
		// For small integers, digits are inline in the long_value.ob_digit array
		digits = (digit )PyMem_RawMalloc(size sizeof(digit));
		if (!digits) {
		PyErr_NoMemory();
		set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong");
		return -1;
		}
		memcpy(digits, long_obj +unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
		memcpy(digits, long_obj +ob_digit_offset, size * sizeof(digit));
		} else {
		// For larger integers, we need to read the digits separately
		digits = (digit )PyMem_RawMalloc(size sizeof(digit));
Expand Down