Expand Up @@ -71,6 +71,8 @@ typedef struct _PyCfgBasicblock { unsigned b_cold : 1; /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */ unsigned b_warm : 1; /* b_dfs_visited_gen is used by the borrow-safe analysis to mark whether a block has been visited */ int b_dfs_visited_gen; } basicblock; Expand All @@ -95,6 +97,8 @@ typedef struct _PyCfgBuilder cfg_builder; #define LOCATION(LNO, END_LNO, COL, END_COL) \ ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)}) static int cfg_dfs_generation_counter = 0; static inline int is_block_push(cfg_instr *i) { Expand Down Expand Up @@ -2701,6 +2705,267 @@ load_fast_push_block(basicblock ***sp, basicblock *target, } } /* * Recursively determine if a borrowed reference remains safe along all CFG paths. * * This function performs a Depth-First Search (DFS) starting from 'current_block'. * It tracks a specific value that was notionally loaded by a LOAD_FAST_BORROW * instruction and is currently at 'depth_of_value_on_entry' on the operand stack * (0 being TOS, -1 if already known to be consumed). The 'original_local_idx' * is the index of the local variable from which this value was borrowed. * * A borrow is considered UNSAFE if, along any execution path before the * borrowed value is consumed from the stack: * 1. The 'original_local_idx' (the backing store for the borrow) is overwritten * (e.g., by STORE_FAST, DELETE_FAST on that local). * 2. The borrowed value itself, when at the top of the stack (depth 0), is * consumed by an instruction that stores it into any local variable * (e.g., STORE_FAST). * 3. The value is not consumed and the CFG path ends (e.g., end of function * without consumption) or enters a cycle where it's not consumed. * * The function uses 'cfg_dfs_generation_counter' in conjunction with * 'current_block->b_dfs_visited_gen' to detect cycles within the current * specific DFS traversal, preventing infinite loops and deeming such cyclic * paths unsafe if the value isn't consumed within the cycle. * * Stack depths are tracked by: * - 'depth_of_value_on_entry': The depth of the tracked item upon entering 'current_block'. * - 'current_target_depth': The depth of the item as instructions in 'current_block' are processed. * - 'depth_before_jump_op_effect': When a jump instruction is encountered, this * is calculated by simulating all prior instructions in 'current_block' to find * the item's depth *just before* the jump instruction itself has any stack effect. * This precise depth is then used to calculate the 'target_entry_depth' for the * recursive call to the jump's target block. * * Args: * current_block: The basic block to start analysis from for this recursive step. * depth_of_value_on_entry: The depth of the tracked borrowed value from TOS * (0 = TOS, -1 if already consumed). * original_local_idx: The index of the local variable backing the borrow. * g: The CFG builder. * * Returns: * true if the borrow is safe along all paths from this point, false otherwise. */ static bool is_borrow_safe( basicblock *current_block, Py_ssize_t depth_of_value_on_entry, int original_local_idx, cfg_builder *g) { if (depth_of_value_on_entry == -1) { return true; } if (current_block->b_dfs_visited_gen == cfg_dfs_generation_counter) { return false; } current_block->b_dfs_visited_gen = cfg_dfs_generation_counter; Py_ssize_t current_target_depth = depth_of_value_on_entry; for (int i = 0; i < current_block->b_iused; i++) { cfg_instr *instr = ¤t_block->b_instr[i]; int opcode = instr->i_opcode; int oparg = instr->i_oparg; // 1. Check if the original local is killed before the value is consumed. if ((opcode == STORE_FAST || opcode == DELETE_FAST || opcode == LOAD_FAST_AND_CLEAR) && oparg == original_local_idx) { return false; } // 2. Simulate stack effect and check for consumption or unsafe store. stack_effects effects_noj; if (get_stack_effects(opcode, oparg, 0, &effects_noj) < 0) { return false; // Error in stack effect calculation } int num_popped = _PyOpcode_num_popped(opcode, oparg); int num_pushed = _PyOpcode_num_pushed(opcode, oparg); if (current_target_depth < num_popped) { if (opcode == STORE_FAST && current_target_depth == 0) { // Unsafe: borrowed value was at TOS and is being stored into a local. return false; } return true; // Safely consumed by this instruction. } // Value not consumed, update its depth. current_target_depth = current_target_depth - num_popped + num_pushed; // 3. Handle branches (jumps are always last in a basic block). if (HAS_TARGET(opcode)) { if (i != current_block->b_iused - 1) { continue; // Skip jumps that are not the last instruction in the block. } bool safe_on_all_branches = true; // Calculate item's depth just before this jump instruction's own stack effect. Py_ssize_t depth_before_jump_op_effect = depth_of_value_on_entry; for(int k=0; k < i; ++k) { // Iterate instructions before the current jump cfg_instr *prev_instr_in_block = ¤t_block->b_instr[k]; // Kill check for intermediate instructions if ((prev_instr_in_block->i_opcode == STORE_FAST || prev_instr_in_block->i_opcode == DELETE_FAST || prev_instr_in_block->i_opcode == LOAD_FAST_AND_CLEAR) && prev_instr_in_block->i_oparg == original_local_idx) { return false; } stack_effects prev_effects; if (get_stack_effects(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg, 0, &prev_effects) < 0) { return false; } int prev_popped_val = _PyOpcode_num_popped(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); if (depth_before_jump_op_effect < prev_popped_val) { // Consumed before jump if (prev_instr_in_block->i_opcode == STORE_FAST && depth_before_jump_op_effect == 0) { return false; // Stored into local } return true; // Safely consumed before the jump } depth_before_jump_op_effect = depth_before_jump_op_effect - prev_popped_val + _PyOpcode_num_pushed(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); } // Analyze jump target path stack_effects effects_jump; if (get_stack_effects(opcode, oparg, 1, &effects_jump) < 0) return false; int jump_popped_val = _PyOpcode_num_popped(opcode, oparg); Py_ssize_t target_entry_depth = depth_before_jump_op_effect; if (target_entry_depth < jump_popped_val) { // Consumed by the jump op itself if (opcode == STORE_FAST && target_entry_depth == 0) { safe_on_all_branches = false; } // else: safely consumed by jump operation. } else { // Not consumed by jump op, recurse on target. target_entry_depth = target_entry_depth - jump_popped_val + _PyOpcode_num_pushed(opcode, oparg); if (!is_borrow_safe(instr->i_target, target_entry_depth, original_local_idx, g)) { safe_on_all_branches = false; } } // Analyze fallthrough path for conditional jumps, if the jump path was safe. if (safe_on_all_branches && IS_CONDITIONAL_JUMP_OPCODE(opcode) && current_block->b_next) { // 'current_target_depth' already reflects the stack state if the jump is *not* taken. if (!is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g)) { safe_on_all_branches = false; } } return safe_on_all_branches; } } // When the instructions finish and the value isn't consumed, check fallthrough. if (current_block->b_next && BB_HAS_FALLTHROUGH(current_block)) { return is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g); } // No further path (or no fallthrough) and value is still on stack (current_target_depth is valid). // This means it's unconsumed at the end of a CFG path. return false; } /* * Initiates a Control Flow Graph (CFG) wide safety check for a borrowed reference. * * This function is called when a LOAD_FAST instruction is a candidate for * LOAD_FAST_BORROW, but its produced value ('REF_UNCONSUMED' flag is set) * might live beyond the current basic block ('producer_block'). * * It determines the immediate successor(s) of the 'producer_block' and the * stack depth at which the borrowed value (identified by 'original_local_idx' * and initially at 'depth_of_value_at_producer_end' from TOS) would enter * these successors. * * It then calls 'is_borrow_safe()' for each successor path. The borrow is * considered globally safe only if 'is_borrow_safe()' returns true for ALL * possible successor paths. * * A new DFS traversal is started by incrementing 'cfg_dfs_generation_counter' * to ensure that 'is_borrow_safe()' uses a fresh set of visited markers * ('b_dfs_visited_gen') for its analysis. * * Args: * producer_block: The basic block containing the LOAD_FAST candidate. * depth_of_value_at_producer_end: The depth of the candidate borrowed value * from TOS at the end of 'producer_block'. * (0 = TOS, -1 if already consumed - though * this function expects a live value). * original_local_idx: The index of the local variable backing the borrow. * g: The CFG builder. * * Returns: * true if the borrow is safe across all subsequent CFG paths, false otherwise. */ static bool check_borrow_safety_globally( basicblock *producer_block, Py_ssize_t depth_of_value_at_producer_end, int original_local_idx, cfg_builder *g) { cfg_dfs_generation_counter++; bool overall_safety = true; cfg_instr *last_instr = basicblock_last_instr(producer_block); // If depth is -1, implies value was already consumed or is invalid for tracking. if (depth_of_value_at_producer_end == -1) { return false; } if (last_instr && HAS_TARGET(last_instr->i_opcode)) { stack_effects effects_jump; if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 1, &effects_jump) < 0) return false; int jump_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); Py_ssize_t entry_depth_for_target = depth_of_value_at_producer_end; if (entry_depth_for_target < jump_popped) { // Consumed by the jump itself. if (last_instr->i_opcode == STORE_FAST && entry_depth_for_target == 0) { overall_safety = false; // Unsafe store of borrowed value. } // else: safely consumed. } else { entry_depth_for_target = entry_depth_for_target - jump_popped + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); if (!is_borrow_safe(last_instr->i_target, entry_depth_for_target, original_local_idx, g)) { overall_safety = false; } } // Analyze fallthrough path for conditional jumps, if the jump path was safe. if (overall_safety && IS_CONDITIONAL_JUMP_OPCODE(last_instr->i_opcode) && producer_block->b_next) { stack_effects effects_fall; // Stack effect if jump is NOT taken. if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 0, &effects_fall) < 0) return false; int fall_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); Py_ssize_t entry_depth_for_fallthrough = depth_of_value_at_producer_end; if (entry_depth_for_fallthrough < fall_popped) { // Consumed by not taking jump. if (last_instr->i_opcode == STORE_FAST && entry_depth_for_fallthrough == 0) { overall_safety = false; // Unsafe store. } // else: safely consumed. } else { // Not consumed by not taking jump, recurse on fallthrough. entry_depth_for_fallthrough = entry_depth_for_fallthrough - fall_popped + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); if (!is_borrow_safe(producer_block->b_next, entry_depth_for_fallthrough, original_local_idx, g)) { overall_safety = false; } } } } else if (producer_block->b_next && BB_HAS_FALLTHROUGH(producer_block)) { // Standard fallthrough, no jump. if (!is_borrow_safe(producer_block->b_next, depth_of_value_at_producer_end, original_local_idx, g)) { overall_safety = false; } } else { // No successors (e.g., block ends in RETURN/RAISE) and value still on stack. overall_safety = false; // Unconsumed at end of a CFG path. } return overall_safety; } /* * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that * load borrowed references onto the operand stack. Expand Down Expand Up @@ -2747,6 +3012,7 @@ optimize_load_fast(cfg_builder *g) basicblock *entryblock = g->g_entryblock; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); b->b_dfs_visited_gen = 0; } size_t instr_flags_size = max_instrs * sizeof(uint8_t); uint8_t *instr_flags = PyMem_Malloc(instr_flags_size); Expand Down Expand Up @@ -2976,17 +3242,82 @@ optimize_load_fast(cfg_builder *g) // Optimize instructions for (int i = 0; i < block->b_iused; i++) { if (!(instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL))) { cfg_instr *instr = &block->b_instr[i]; switch (instr->i_opcode) { case LOAD_FAST: cfg_instr *instr = &block->b_instr[i]; bool is_load_fast_type = (instr->i_opcode == LOAD_FAST || instr->i_opcode == LOAD_FAST_LOAD_FAST); if (is_load_fast_type) { bool killed_or_stored_locally = (instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL)); if (killed_or_stored_locally) { continue; // Definitely cannot borrow due to local block issues } bool unconsumed_in_block = (instr_flags[i] & REF_UNCONSUMED); bool can_borrow = false; if (!unconsumed_in_block) { can_borrow = true; // Safe by local analysis, consumed in current block } else { if (instr->i_opcode == LOAD_FAST) { int local_idx = instr->i_oparg; Py_ssize_t depth_from_tos_at_block_end = -1; // Find the specific item on the `refs` stack (at end of current block simulation) for (Py_ssize_t k = 0; k < refs.size; k++) { ref r = ref_stack_at(&refs, k); if (r.instr == i && r.local == local_idx) { // Match instruction and local depth_from_tos_at_block_end = refs.size - 1 - k; break; } } if (depth_from_tos_at_block_end != -1) { can_borrow = check_borrow_safety_globally(block, depth_from_tos_at_block_end, local_idx, g); } else { // If REF_UNCONSUMED is set but we couldn't find its depth, assume unsafe. // This can happen if refs.size is 0, yet flag is set. can_borrow = false; } } else { // LOAD_FAST_LOAD_FAST can_borrow = true; // Assume true, prove false if any part is unsafe int local_idx1 = instr->i_oparg >> 4; int local_idx2 = instr->i_oparg & 15; Py_ssize_t depth1 = -1, depth2 = -1; // Find depths on `refs` stack for both products of this LFLF instruction `i` for (Py_ssize_t k = 0; k < refs.size; k++) { ref r = ref_stack_at(&refs, k); if (r.instr == i) { Py_ssize_t current_depth = refs.size - 1 - k; if (r.local == local_idx1 && depth1 == -1) depth1 = current_depth; else if (r.local == local_idx2 && depth2 == -1) depth2 = current_depth; } } bool found_any_on_stack = (depth1 != -1 || depth2 != -1); if (depth1 != -1) { if (!check_borrow_safety_globally(block, depth1, local_idx1, g)) { can_borrow = false; } } if (can_borrow && depth2 != -1) { if (!check_borrow_safety_globally(block, depth2, local_idx2, g)) { can_borrow = false; } } if (unconsumed_in_block && !found_any_on_stack) { can_borrow = false; } } } if (can_borrow) { if (instr->i_opcode == LOAD_FAST) { instr->i_opcode = LOAD_FAST_BORROW; break; case LOAD_FAST_LOAD_FAST: } else if (instr->i_opcode == LOAD_FAST_LOAD_FAST) { instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; break; default: break; } } } } Expand Down