Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

gh-140739: Fix crashes from corrupted remote memory#143190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
pablogsal merged 3 commits intopython:mainfrompablogsal:gh-140739
Dec 26, 2025
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
Fix several crashes due to reading invalid memory in the new Tachyon
sampling profiler. Patch by Pablo Galindo.
6 changes: 6 additions & 0 deletionsModules/_remote_debugging/_remote_debugging.h
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -140,6 +140,11 @@ typedef enum _WIN32_THREADSTATE {
#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)

/* Maximum sizes for validation to prevent buffer overflows from corrupted data */
#define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks */
#define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */
#define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set iteration */

#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
Expand DownExpand Up@@ -451,6 +456,7 @@ extern PyObject *make_frame_info(
extern bool parse_linetable(
const uintptr_t addrq,
const char* linetable,
Py_ssize_t linetable_size,
int firstlineno,
LocationInfo* info
);
Expand Down
17 changes: 13 additions & 4 deletionsModules/_remote_debugging/asyncio.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -112,9 +112,17 @@ iterate_set_entries(
}

Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used);
Py_ssize_tset_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1;
Py_ssize_tmask = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask);
uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table);

// Validate mask and num_els to prevent huge loop iterations from garbage data
if (mask < 0 || mask >= MAX_SET_TABLE_SIZE || num_els < 0 || num_els > mask + 1) {
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid set object (corrupted remote memory)");
return -1;
}
Py_ssize_t set_len = mask + 1;

Py_ssize_t i = 0;
Py_ssize_t els = 0;
while (i < set_len && els < num_els) {
Expand DownExpand Up@@ -812,14 +820,15 @@ append_awaited_by_for_thread(
return -1;
}

if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) {
uintptr_t next_node = GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next);
if (next_node == 0) {
PyErr_SetString(PyExc_RuntimeError,
"Invalid linked list structure reading remote memory");
set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list");
return -1;
}

uintptr_t task_addr =(uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next)
uintptr_t task_addr =next_node
- (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node;

if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
Expand All@@ -830,7 +839,7 @@ append_awaited_by_for_thread(
// Read next node
if (_Py_RemoteDebug_PagedReadRemoteMemory(
&unwinder->handle,
(uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next),
next_node,
sizeof(task_node),
task_node) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by");
Expand Down
117 changes: 91 additions & 26 deletionsModules/_remote_debugging/code_objects.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -123,44 +123,74 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
* LINE TABLE PARSING FUNCTIONS
* ============================================================================ */

// Inline helper for bounds-checked byte reading (no function call overhead)
static inline int
read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out)
{
if (*ptr >= end) {
return -1;
}
*out = *(*ptr)++;
return 0;
}

static int
scan_varint(const uint8_t **ptr)
scan_varint(const uint8_t **ptr, const uint8_t *end)
{
unsigned int read = **ptr;
*ptr = *ptr + 1;
uint8_t read;
if (read_byte(ptr, end, &read) < 0) {
return -1;
}
unsigned int val = read & 63;
unsigned int shift = 0;
while (read & 64) {
read = **ptr;
*ptr = *ptr + 1;
if (read_byte(ptr, end, &read) < 0) {
return -1;
}
shift += 6;
// Prevent infinite loop on malformed data (shift overflow)
if (shift > 28) {
return -1;
}
val |= (read & 63) << shift;
}
return val;
return(int)val;
}

static int
scan_signed_varint(const uint8_t **ptr)
scan_signed_varint(const uint8_t **ptr, const uint8_t *end)
{
unsigned int uval = scan_varint(ptr);
int uval = scan_varint(ptr, end);
if (uval < 0) {
return INT_MIN; // Error sentinel (valid signed varints won't be INT_MIN)
}
if (uval & 1) {
return -(int)(uval >> 1);
return -(int)((unsigned int)uval >> 1);
}
else {
return uval >> 1;
return(int)((unsigned int)uval >> 1);
}
}

bool
parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size,
int firstlineno, LocationInfo* info)
{
// Reject garbage: zero or negative size
if (linetable_size <= 0) {
return false;
}

const uint8_t* ptr = (const uint8_t*)(linetable);
const uint8_t* end = ptr + linetable_size;
uintptr_t addr = 0;
int computed_line = firstlineno; // Running accumulator, separate from output
int val; // Temporary for varint results
uint8_t byte; // Temporary for byte reads
const size_t MAX_LINETABLE_ENTRIES = 65536;
size_t entry_count = 0;

while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
while (ptr < end &&*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
entry_count++;
uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15;
Expand All@@ -173,14 +203,34 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
info->column = info->end_column = -1;
break;
case PY_CODE_LOCATION_INFO_LONG:
computed_line += scan_signed_varint(&ptr);
val = scan_signed_varint(&ptr, end);
if (val == INT_MIN) {
return false;
}
computed_line += val;
info->lineno = computed_line;
info->end_lineno = computed_line + scan_varint(&ptr);
info->column = scan_varint(&ptr) - 1;
info->end_column = scan_varint(&ptr) - 1;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->end_lineno = computed_line + val;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->column = val - 1;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->end_column = val - 1;
break;
case PY_CODE_LOCATION_INFO_NO_COLUMNS:
computed_line += scan_signed_varint(&ptr);
val = scan_signed_varint(&ptr, end);
if (val == INT_MIN) {
return false;
}
computed_line += val;
info->lineno = info->end_lineno = computed_line;
info->column = info->end_column = -1;
break;
Expand All@@ -189,17 +239,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
case PY_CODE_LOCATION_INFO_ONE_LINE2:
computed_line += code - 10;
info->lineno = info->end_lineno = computed_line;
info->column = *(ptr++);
info->end_column = *(ptr++);
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
info->column = byte;
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
info->end_column = byte;
break;
default: {
uint8_t second_byte = *(ptr++);
if ((second_byte & 128) != 0) {
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
if ((byte & 128) != 0) {
return false;
}
info->lineno = info->end_lineno = computed_line;
info->column = code << 3 | (second_byte >> 4);
info->end_column = info->column + (second_byte & 15);
info->column = code << 3 | (byte >> 4);
info->end_column = info->column + (byte & 15);
break;
}
}
Expand DownExpand Up@@ -384,8 +442,14 @@ parse_code_object(RemoteUnwinderObject *unwinder,
tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
}

if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
assert(ctx->tlbc_index >= 0);
// Validate tlbc_index and check TLBC cache
if (tlbc_entry) {
// Validate index bounds (also catches negative values since tlbc_index is signed)
if (ctx->tlbc_index < 0 || ctx->tlbc_index >= tlbc_entry->tlbc_array_size) {
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid tlbc_index (corrupted remote memory)");
goto error;
}
assert(tlbc_entry->tlbc_array_size > 0);
// Use cached TLBC data
uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
Expand All@@ -398,7 +462,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
}
}

// Fall back to main bytecode
// Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0)
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;

done_tlbc:
Expand All@@ -409,6 +473,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
; // Empty statement to avoid C23 extension warning
LocationInfo info = {0};
bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
PyBytes_GET_SIZE(meta->linetable),
meta->first_lineno, &info);
if (!ok) {
info.lineno = -1;
Expand Down
15 changes: 14 additions & 1 deletionModules/_remote_debugging/frames.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -45,6 +45,15 @@ process_single_stack_chunk(
// Check actual size and reread if necessary
size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
if (actual_size != current_size) {
// Validate size: reject garbage (too small or unreasonably large)
// Size must be at least enough for the header and reasonably bounded
if (actual_size <= offsetof(_PyStackChunk, data) || actual_size > MAX_STACK_CHUNK_SIZE) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

I think this is looser than I'd like for_PyStackChunk, but whatever.

Copy link
MemberAuthor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

Do you have any suggestion?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

I wish we had a tighter bound, but I'm not sure what would be appropiate for a real world stack frame? So that's why I said whatever :)

Copy link
MemberAuthor

@pablogsalpablogsalDec 26, 2025
edited
Loading

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

I wish we had a tighter bound, but I'm not sure what would be appropiate for a real world stack frame?

But this is for the entire chunk no? Chunks will grow from 16Kb to whatever, we just need to ensure we don't copy too much because we just read a garbage size to copy. Chunks here are just an optimization so if we fail to read the chunks we will fallback to read frame-by-frame (which sucks but it works).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

Oh I didn't know you do the second part (fallback to read one by one). I think that's fine then.

PyMem_RawFree(this_chunk);
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid stack chunk size (corrupted remote memory)");
return -1;
}

this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
if (!this_chunk) {
PyErr_NoMemory();
Expand DownExpand Up@@ -129,7 +138,11 @@ void *
find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
{
for (size_t i = 0; i < chunks->count; ++i) {
assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data));
// Validate size: reject garbage that would cause underflow
if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) {
// Skip this chunk - corrupted size from remote memory
continue;
}
uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);

Expand Down
9 changes: 9 additions & 0 deletionsModules/_remote_debugging/module.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -584,13 +584,22 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
}

while (current_tstate != 0) {
uintptr_t prev_tstate = current_tstate;
PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate,
gil_holder_tstate,
gc_frame);
if (!frame_info) {
// Check if this was an intentional skip due to mode-based filtering
if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL ||
self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) {
// Detect cycle: if current_tstate didn't advance, we have corrupted data
if (current_tstate == prev_tstate) {
Py_DECREF(interpreter_threads);
set_exception_cause(self, PyExc_RuntimeError,
"Thread list cycle detected (corrupted remote memory)");
Py_CLEAR(result);
goto exit;
}
// Thread was skipped due to mode filtering, continue to next thread
continue;
}
Expand Down
18 changes: 15 additions & 3 deletionsModules/_remote_debugging/object_reading.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -194,17 +194,29 @@ read_py_long(
return 0;
}

// If the long object has inline digits, use them directly
// Validate size: reject garbage (negative or unreasonably large)
if (size < 0 || size > MAX_LONG_DIGITS) {
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid PyLong size (corrupted remote memory)");
return -1;
}

// Calculate how many digits fit inline in our local buffer
Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit;
Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset;
Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit);

// If the long object has inline digits that fit in our buffer, use them directly
digit *digits;
if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
if (size <=max_inline_digits && size <=_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
// For small integers, digits are inline in the long_value.ob_digit array
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
if (!digits) {
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong");
return -1;
}
memcpy(digits, long_obj +unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
memcpy(digits, long_obj +ob_digit_offset, size * sizeof(digit));
} else {
// For larger integers, we need to read the digits separately
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
Expand Down
Loading

[8]ページ先頭

©2009-2026 Movatter.jp