Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

gh-133672: Allow LOAD_FAST to be optimized to LOAD_FAST_BORROW#133721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
ljfp wants to merge10 commits intopython:main
base:main
Choose a base branch
Loading
fromljfp:fix-issue-133672
Open
Changes from1 commit
Commits
Show all changes
10 commits
Select commitHold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PrevPrevious commit
NextNext commit
Improve CFG optimization with new LOAD_FAST_BORROW safety checks.
  • Loading branch information
@ljfp
ljfp committedMay 14, 2025
commit2f0ffdda25a22b0d46a24224ac552392d80e2944
349 changes: 340 additions & 9 deletionsPython/flowgraph.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -71,6 +71,8 @@ typedef struct _PyCfgBasicblock {
unsigned b_cold : 1;
/* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
unsigned b_warm : 1;
/* b_dfs_visited_gen is used by the borrow-safe analysis to mark whether a block has been visited */
int b_dfs_visited_gen;
} basicblock;


Expand All@@ -95,6 +97,8 @@ typedef struct _PyCfgBuilder cfg_builder;
#define LOCATION(LNO, END_LNO, COL, END_COL) \
((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})

static int cfg_dfs_generation_counter = 0;

static inline int
is_block_push(cfg_instr *i)
{
Expand DownExpand Up@@ -2701,6 +2705,267 @@ load_fast_push_block(basicblock ***sp, basicblock *target,
}
}

/*
* Recursively determine if a borrowed reference remains safe along all CFG paths.
*
* This function performs a Depth-First Search (DFS) starting from 'current_block'.
* It tracks a specific value that was notionally loaded by a LOAD_FAST_BORROW
* instruction and is currently at 'depth_of_value_on_entry' on the operand stack
* (0 being TOS, -1 if already known to be consumed). The 'original_local_idx'
* is the index of the local variable from which this value was borrowed.
*
* A borrow is considered UNSAFE if, along any execution path before the
* borrowed value is consumed from the stack:
* 1. The 'original_local_idx' (the backing store for the borrow) is overwritten
* (e.g., by STORE_FAST, DELETE_FAST on that local).
* 2. The borrowed value itself, when at the top of the stack (depth 0), is
* consumed by an instruction that stores it into any local variable
* (e.g., STORE_FAST).
* 3. The value is not consumed and the CFG path ends (e.g., end of function
* without consumption) or enters a cycle where it's not consumed.
*
* The function uses 'cfg_dfs_generation_counter' in conjunction with
* 'current_block->b_dfs_visited_gen' to detect cycles within the current
* specific DFS traversal, preventing infinite loops and deeming such cyclic
* paths unsafe if the value isn't consumed within the cycle.
*
* Stack depths are tracked by:
* - 'depth_of_value_on_entry': The depth of the tracked item upon entering 'current_block'.
* - 'current_target_depth': The depth of the item as instructions in 'current_block' are processed.
* - 'depth_before_jump_op_effect': When a jump instruction is encountered, this
* is calculated by simulating all prior instructions in 'current_block' to find
* the item's depth *just before* the jump instruction itself has any stack effect.
* This precise depth is then used to calculate the 'target_entry_depth' for the
* recursive call to the jump's target block.
*
* Args:
* current_block: The basic block to start analysis from for this recursive step.
* depth_of_value_on_entry: The depth of the tracked borrowed value from TOS
* (0 = TOS, -1 if already consumed).
* original_local_idx: The index of the local variable backing the borrow.
* g: The CFG builder.
*
* Returns:
* true if the borrow is safe along all paths from this point, false otherwise.
*/
static bool
is_borrow_safe(
basicblock *current_block,
Py_ssize_t depth_of_value_on_entry,
int original_local_idx,
cfg_builder *g)
{
if (depth_of_value_on_entry == -1) {
return true;
}

if (current_block->b_dfs_visited_gen == cfg_dfs_generation_counter) {
return false;
}
current_block->b_dfs_visited_gen = cfg_dfs_generation_counter;

Py_ssize_t current_target_depth = depth_of_value_on_entry;

for (int i = 0; i < current_block->b_iused; i++) {
cfg_instr *instr = &current_block->b_instr[i];
int opcode = instr->i_opcode;
int oparg = instr->i_oparg;

// 1. Check if the original local is killed before the value is consumed.
if ((opcode == STORE_FAST || opcode == DELETE_FAST || opcode == LOAD_FAST_AND_CLEAR) &&
oparg == original_local_idx) {
return false;
}

// 2. Simulate stack effect and check for consumption or unsafe store.
stack_effects effects_noj;
if (get_stack_effects(opcode, oparg, 0, &effects_noj) < 0) {
return false; // Error in stack effect calculation
}
int num_popped = _PyOpcode_num_popped(opcode, oparg);
int num_pushed = _PyOpcode_num_pushed(opcode, oparg);

if (current_target_depth < num_popped) {
if (opcode == STORE_FAST && current_target_depth == 0) {
// Unsafe: borrowed value was at TOS and is being stored into a local.
return false;
}
return true; // Safely consumed by this instruction.
}
// Value not consumed, update its depth.
current_target_depth = current_target_depth - num_popped + num_pushed;

// 3. Handle branches (jumps are always last in a basic block).
if (HAS_TARGET(opcode)) {
if (i != current_block->b_iused - 1) {
continue; // Skip jumps that are not the last instruction in the block.
}
bool safe_on_all_branches = true;

// Calculate item's depth just before this jump instruction's own stack effect.
Py_ssize_t depth_before_jump_op_effect = depth_of_value_on_entry;
for(int k=0; k < i; ++k) { // Iterate instructions before the current jump
cfg_instr *prev_instr_in_block = &current_block->b_instr[k];
// Kill check for intermediate instructions
if ((prev_instr_in_block->i_opcode == STORE_FAST ||
prev_instr_in_block->i_opcode == DELETE_FAST ||
prev_instr_in_block->i_opcode == LOAD_FAST_AND_CLEAR) &&
prev_instr_in_block->i_oparg == original_local_idx) {
return false;
}
stack_effects prev_effects;
if (get_stack_effects(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg, 0, &prev_effects) < 0) {
return false;
}
int prev_popped_val = _PyOpcode_num_popped(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg);
if (depth_before_jump_op_effect < prev_popped_val) { // Consumed before jump
if (prev_instr_in_block->i_opcode == STORE_FAST && depth_before_jump_op_effect == 0) {
return false; // Stored into local
}
return true; // Safely consumed before the jump
}
depth_before_jump_op_effect = depth_before_jump_op_effect - prev_popped_val +
_PyOpcode_num_pushed(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg);
}

// Analyze jump target path
stack_effects effects_jump;
if (get_stack_effects(opcode, oparg, 1, &effects_jump) < 0) return false;
int jump_popped_val = _PyOpcode_num_popped(opcode, oparg);
Py_ssize_t target_entry_depth = depth_before_jump_op_effect;

if (target_entry_depth < jump_popped_val) { // Consumed by the jump op itself
if (opcode == STORE_FAST && target_entry_depth == 0) {
safe_on_all_branches = false;
}
// else: safely consumed by jump operation.
} else { // Not consumed by jump op, recurse on target.
target_entry_depth = target_entry_depth - jump_popped_val + _PyOpcode_num_pushed(opcode, oparg);
if (!is_borrow_safe(instr->i_target, target_entry_depth, original_local_idx, g)) {
safe_on_all_branches = false;
}
}

// Analyze fallthrough path for conditional jumps, if the jump path was safe.
if (safe_on_all_branches && IS_CONDITIONAL_JUMP_OPCODE(opcode) && current_block->b_next) {
// 'current_target_depth' already reflects the stack state if the jump is *not* taken.
if (!is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g)) {
safe_on_all_branches = false;
}
}
return safe_on_all_branches;
}
}

// When the instructions finish and the value isn't consumed, check fallthrough.
if (current_block->b_next && BB_HAS_FALLTHROUGH(current_block)) {
return is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g);
}

// No further path (or no fallthrough) and value is still on stack (current_target_depth is valid).
// This means it's unconsumed at the end of a CFG path.
return false;
}


/*
* Initiates a Control Flow Graph (CFG) wide safety check for a borrowed reference.
*
* This function is called when a LOAD_FAST instruction is a candidate for
* LOAD_FAST_BORROW, but its produced value ('REF_UNCONSUMED' flag is set)
* might live beyond the current basic block ('producer_block').
*
* It determines the immediate successor(s) of the 'producer_block' and the
* stack depth at which the borrowed value (identified by 'original_local_idx'
* and initially at 'depth_of_value_at_producer_end' from TOS) would enter
* these successors.
*
* It then calls 'is_borrow_safe()' for each successor path. The borrow is
* considered globally safe only if 'is_borrow_safe()' returns true for ALL
* possible successor paths.
*
* A new DFS traversal is started by incrementing 'cfg_dfs_generation_counter'
* to ensure that 'is_borrow_safe()' uses a fresh set of visited markers
* ('b_dfs_visited_gen') for its analysis.
*
* Args:
* producer_block: The basic block containing the LOAD_FAST candidate.
* depth_of_value_at_producer_end: The depth of the candidate borrowed value
* from TOS at the end of 'producer_block'.
* (0 = TOS, -1 if already consumed - though
* this function expects a live value).
* original_local_idx: The index of the local variable backing the borrow.
* g: The CFG builder.
*
* Returns:
* true if the borrow is safe across all subsequent CFG paths, false otherwise.
*/
static bool
check_borrow_safety_globally(
basicblock *producer_block,
Py_ssize_t depth_of_value_at_producer_end,
int original_local_idx,
cfg_builder *g)
{
cfg_dfs_generation_counter++;
bool overall_safety = true;

cfg_instr *last_instr = basicblock_last_instr(producer_block);

// If depth is -1, implies value was already consumed or is invalid for tracking.
if (depth_of_value_at_producer_end == -1) {
return false;
}

if (last_instr && HAS_TARGET(last_instr->i_opcode)) {
stack_effects effects_jump;
if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 1, &effects_jump) < 0) return false;
int jump_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg);
Py_ssize_t entry_depth_for_target = depth_of_value_at_producer_end;

if (entry_depth_for_target < jump_popped) { // Consumed by the jump itself.
if (last_instr->i_opcode == STORE_FAST && entry_depth_for_target == 0) {
overall_safety = false; // Unsafe store of borrowed value.
}
// else: safely consumed.
} else {
entry_depth_for_target = entry_depth_for_target - jump_popped +
_PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg);
if (!is_borrow_safe(last_instr->i_target, entry_depth_for_target, original_local_idx, g)) {
overall_safety = false;
}
}

// Analyze fallthrough path for conditional jumps, if the jump path was safe.
if (overall_safety && IS_CONDITIONAL_JUMP_OPCODE(last_instr->i_opcode) && producer_block->b_next) {
stack_effects effects_fall; // Stack effect if jump is NOT taken.
if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 0, &effects_fall) < 0) return false;
int fall_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg);
Py_ssize_t entry_depth_for_fallthrough = depth_of_value_at_producer_end;

if (entry_depth_for_fallthrough < fall_popped) { // Consumed by not taking jump.
if (last_instr->i_opcode == STORE_FAST && entry_depth_for_fallthrough == 0) {
overall_safety = false; // Unsafe store.
}
// else: safely consumed.
} else { // Not consumed by not taking jump, recurse on fallthrough.
entry_depth_for_fallthrough = entry_depth_for_fallthrough - fall_popped +
_PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg);
if (!is_borrow_safe(producer_block->b_next, entry_depth_for_fallthrough, original_local_idx, g)) {
overall_safety = false;
}
}
}
} else if (producer_block->b_next && BB_HAS_FALLTHROUGH(producer_block)) { // Standard fallthrough, no jump.
if (!is_borrow_safe(producer_block->b_next, depth_of_value_at_producer_end, original_local_idx, g)) {
overall_safety = false;
}
} else { // No successors (e.g., block ends in RETURN/RAISE) and value still on stack.
overall_safety = false; // Unconsumed at end of a CFG path.
}
return overall_safety;
}

/*
* Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
* load borrowed references onto the operand stack.
Expand DownExpand Up@@ -2747,6 +3012,7 @@ optimize_load_fast(cfg_builder *g)
basicblock *entryblock = g->g_entryblock;
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
max_instrs = Py_MAX(max_instrs, b->b_iused);
b->b_dfs_visited_gen = 0;
}
size_t instr_flags_size = max_instrs * sizeof(uint8_t);
uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
Expand DownExpand Up@@ -2976,17 +3242,82 @@ optimize_load_fast(cfg_builder *g)

// Optimize instructions
for (int i = 0; i < block->b_iused; i++) {
if (!(instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL))) {
cfg_instr *instr = &block->b_instr[i];
switch (instr->i_opcode) {
case LOAD_FAST:
cfg_instr *instr = &block->b_instr[i];
bool is_load_fast_type = (instr->i_opcode == LOAD_FAST || instr->i_opcode == LOAD_FAST_LOAD_FAST);

if (is_load_fast_type) {
bool killed_or_stored_locally = (instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL));
if (killed_or_stored_locally) {
continue; // Definitely cannot borrow due to local block issues
}

bool unconsumed_in_block = (instr_flags[i] & REF_UNCONSUMED);
bool can_borrow = false;

if (!unconsumed_in_block) {
can_borrow = true; // Safe by local analysis, consumed in current block
} else {
if (instr->i_opcode == LOAD_FAST) {
int local_idx = instr->i_oparg;
Py_ssize_t depth_from_tos_at_block_end = -1;
// Find the specific item on the `refs` stack (at end of current block simulation)
for (Py_ssize_t k = 0; k < refs.size; k++) {
ref r = ref_stack_at(&refs, k);
if (r.instr == i && r.local == local_idx) { // Match instruction and local
depth_from_tos_at_block_end = refs.size - 1 - k;
break;
}
}

if (depth_from_tos_at_block_end != -1) {
can_borrow = check_borrow_safety_globally(block, depth_from_tos_at_block_end, local_idx, g);
} else {
// If REF_UNCONSUMED is set but we couldn't find its depth, assume unsafe.
// This can happen if refs.size is 0, yet flag is set.
can_borrow = false;
}

} else { // LOAD_FAST_LOAD_FAST
can_borrow = true; // Assume true, prove false if any part is unsafe
int local_idx1 = instr->i_oparg >> 4;
int local_idx2 = instr->i_oparg & 15;
Py_ssize_t depth1 = -1, depth2 = -1;

// Find depths on `refs` stack for both products of this LFLF instruction `i`
for (Py_ssize_t k = 0; k < refs.size; k++) {
ref r = ref_stack_at(&refs, k);
if (r.instr == i) {
Py_ssize_t current_depth = refs.size - 1 - k;
if (r.local == local_idx1 && depth1 == -1) depth1 = current_depth;
else if (r.local == local_idx2 && depth2 == -1) depth2 = current_depth;
}
}

bool found_any_on_stack = (depth1 != -1 || depth2 != -1);

if (depth1 != -1) {
if (!check_borrow_safety_globally(block, depth1, local_idx1, g)) {
can_borrow = false;
}
}
if (can_borrow && depth2 != -1) {
if (!check_borrow_safety_globally(block, depth2, local_idx2, g)) {
can_borrow = false;
}
}

if (unconsumed_in_block && !found_any_on_stack) {
can_borrow = false;
}
}
}

if (can_borrow) {
if (instr->i_opcode == LOAD_FAST) {
instr->i_opcode = LOAD_FAST_BORROW;
break;
case LOAD_FAST_LOAD_FAST:
} else if (instr->i_opcode == LOAD_FAST_LOAD_FAST) {
instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
break;
default:
break;
}
}
}
}
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp