Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
X86CallFrameOptimization.cpp
Go to the documentation of this file.
1//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pass that optimizes call sequences on x86.
10// Currently, it converts movs of function parameters onto the stack into
11// pushes. This is beneficial for two main reasons:
12// 1) The push instruction encoding is much smaller than a stack-ptr-based mov.
13// 2) It is possible to push memory arguments directly. So, if the
14// the transformation is performed pre-reg-alloc, it can help relieve
15// register pressure.
16//
17//===----------------------------------------------------------------------===//
18
19#include "MCTargetDesc/X86BaseInfo.h"
20#include "X86.h"
21#include "X86FrameLowering.h"
22#include "X86InstrInfo.h"
23#include "X86MachineFunctionInfo.h"
24#include "X86RegisterInfo.h"
25#include "X86Subtarget.h"
26#include "llvm/ADT/DenseSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineFunctionPass.h"
33#include "llvm/CodeGen/MachineInstr.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineOperand.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/TargetInstrInfo.h"
38#include "llvm/CodeGen/TargetRegisterInfo.h"
39#include "llvm/IR/DebugLoc.h"
40#include "llvm/IR/Function.h"
41#include "llvm/MC/MCDwarf.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/MathExtras.h"
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49
50using namespacellvm;
51
52#define DEBUG_TYPE "x86-cf-opt"
53
54staticcl::opt<bool>
55NoX86CFOpt("no-x86-call-frame-opt",
56cl::desc("Avoid optimizing x86 call frames for size"),
57cl::init(false),cl::Hidden);
58
59namespace{
60
61classX86CallFrameOptimization :publicMachineFunctionPass {
62public:
63 X86CallFrameOptimization() :MachineFunctionPass(ID) { }
64
65boolrunOnMachineFunction(MachineFunction &MF)override;
66
67staticcharID;
68
69private:
70// Information we know about a particular call site
71structCallContext {
72 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
73
74// Iterator referring to the frame setup instruction
75MachineBasicBlock::iterator FrameSetup;
76
77// Actual call instruction
78MachineInstr *Call =nullptr;
79
80// A copy of the stack pointer
81MachineInstr *SPCopy =nullptr;
82
83// The total displacement of all passed parameters
84 int64_t ExpectedDist = 0;
85
86// The sequence of storing instructions used to pass the parameters
87SmallVector<MachineInstr *, 4> ArgStoreVector;
88
89// True if this call site has no stack parameters
90bool NoStackParams =false;
91
92// True if this call site can use push instructions
93bool UsePush =false;
94 };
95
96typedefSmallVector<CallContext, 8> ContextVector;
97
98bool isLegal(MachineFunction &MF);
99
100boolisProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
101
102void collectCallInfo(MachineFunction &MF,MachineBasicBlock &MBB,
103MachineBasicBlock::iteratorI, CallContext &Context);
104
105void adjustCallSequence(MachineFunction &MF,const CallContext &Context);
106
107MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
108Register Reg);
109
110enum InstClassification { Convert,Skip,Exit };
111
112 InstClassification classifyInstruction(MachineBasicBlock &MBB,
113MachineBasicBlock::iteratorMI,
114constX86RegisterInfo &RegInfo,
115DenseSet<unsigned int> &UsedRegs);
116
117StringRefgetPassName() const override{return"X86 Optimize Call Frame"; }
118
119constX86InstrInfo *TII =nullptr;
120constX86FrameLowering *TFL =nullptr;
121constX86Subtarget *STI =nullptr;
122MachineRegisterInfo *MRI =nullptr;
123unsigned SlotSize = 0;
124unsigned Log2SlotSize = 0;
125};
126
127}// end anonymous namespace
128char X86CallFrameOptimization::ID = 0;
129INITIALIZE_PASS(X86CallFrameOptimization,DEBUG_TYPE,
130"X86 Call Frame Optimization",false,false)
131
132// This checks whether the transformation is legal.
133// Also returns false in cases where it's potentially legal, but
134// we don't even want to try.
135bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
136if (NoX86CFOpt.getValue())
137returnfalse;
138
139// We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
140// in the compact unwind encoding that Darwin uses. So, bail if there
141// is a danger of that being generated.
142if (STI->isTargetDarwin() &&
143 (!MF.getLandingPads().empty() ||
144 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
145returnfalse;
146
147// It is not valid to change the stack pointer outside the prolog/epilog
148// on 64-bit Windows.
149if (STI->isTargetWin64())
150returnfalse;
151
152// You would expect straight-line code between call-frame setup and
153// call-frame destroy. You would be wrong. There are circumstances (e.g.
154// CMOV_GR8 expansion of a select that feeds a function call!) where we can
155// end up with the setup and the destroy in different basic blocks.
156// This is bad, and breaks SP adjustment.
157// So, check that all of the frames in the function are closed inside
158// the same block, and, for good measure, that there are no nested frames.
159//
160// If any call allocates more argument stack memory than the stack
161// probe size, don't do this optimization. Otherwise, this pass
162// would need to synthesize additional stack probe calls to allocate
163// memory for arguments.
164unsigned FrameSetupOpcode =TII->getCallFrameSetupOpcode();
165unsigned FrameDestroyOpcode =TII->getCallFrameDestroyOpcode();
166bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
167unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
168for (MachineBasicBlock &BB : MF) {
169bool InsideFrameSequence =false;
170for (MachineInstr &MI : BB) {
171if (MI.getOpcode() == FrameSetupOpcode) {
172if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
173returnfalse;
174if (InsideFrameSequence)
175returnfalse;
176 InsideFrameSequence =true;
177 }elseif (MI.getOpcode() == FrameDestroyOpcode) {
178if (!InsideFrameSequence)
179returnfalse;
180 InsideFrameSequence =false;
181 }
182 }
183
184if (InsideFrameSequence)
185returnfalse;
186 }
187
188returntrue;
189}
190
191// Check whether this transformation is profitable for a particular
192// function - in terms of code size.
193bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
194 ContextVector &CallSeqVector) {
195// This transformation is always a win when we do not expect to have
196// a reserved call frame. Under other circumstances, it may be either
197// a win or a loss, and requires a heuristic.
198bool CannotReserveFrame = MF.getFrameInfo().hasVarSizedObjects();
199if (CannotReserveFrame)
200returntrue;
201
202Align StackAlign = TFL->getStackAlign();
203
204 int64_t Advantage = 0;
205for (constauto &CC : CallSeqVector) {
206// Call sites where no parameters are passed on the stack
207// do not affect the cost, since there needs to be no
208// stack adjustment.
209if (CC.NoStackParams)
210continue;
211
212if (!CC.UsePush) {
213// If we don't use pushes for a particular call site,
214// we pay for not having a reserved call frame with an
215// additional sub/add esp pair. The cost is ~3 bytes per instruction,
216// depending on the size of the constant.
217// TODO: Callee-pop functions should have a smaller penalty, because
218// an add is needed even with a reserved call frame.
219 Advantage -= 6;
220 }else {
221// We can use pushes. First, account for the fixed costs.
222// We'll need a add after the call.
223 Advantage -= 3;
224// If we have to realign the stack, we'll also need a sub before
225if (!isAligned(StackAlign,CC.ExpectedDist))
226 Advantage -= 3;
227// Now, for each push, we save ~3 bytes. For small constants, we actually,
228// save more (up to 5 bytes), but 3 should be a good approximation.
229 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
230 }
231 }
232
233return Advantage >= 0;
234}
235
236bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
237 STI = &MF.getSubtarget<X86Subtarget>();
238TII = STI->getInstrInfo();
239 TFL = STI->getFrameLowering();
240MRI = &MF.getRegInfo();
241
242constX86RegisterInfo &RegInfo =
243 *static_cast<constX86RegisterInfo *>(STI->getRegisterInfo());
244 SlotSize =RegInfo.getSlotSize();
245assert(isPowerOf2_32(SlotSize) &&"Expect power of 2 stack slot size");
246 Log2SlotSize =Log2_32(SlotSize);
247
248if (skipFunction(MF.getFunction()) || !isLegal(MF))
249returnfalse;
250
251unsigned FrameSetupOpcode =TII->getCallFrameSetupOpcode();
252
253bool Changed =false;
254
255 ContextVector CallSeqVector;
256
257for (auto &MBB : MF)
258for (auto &MI :MBB)
259if (MI.getOpcode() == FrameSetupOpcode) {
260 CallContext Context;
261 collectCallInfo(MF,MBB,MI, Context);
262 CallSeqVector.push_back(Context);
263 }
264
265if (!isProfitable(MF, CallSeqVector))
266returnfalse;
267
268for (constauto &CC : CallSeqVector) {
269if (CC.UsePush) {
270 adjustCallSequence(MF,CC);
271 Changed =true;
272 }
273 }
274
275return Changed;
276}
277
278X86CallFrameOptimization::InstClassification
279X86CallFrameOptimization::classifyInstruction(
280MachineBasicBlock &MBB,MachineBasicBlock::iteratorMI,
281constX86RegisterInfo &RegInfo,DenseSet<unsigned int> &UsedRegs) {
282if (MI ==MBB.end())
283returnExit;
284
285// The instructions we actually care about are movs onto the stack or special
286// cases of constant-stores to stack
287switch (MI->getOpcode()) {
288case X86::AND16mi:
289case X86::AND32mi:
290case X86::AND64mi32: {
291constMachineOperand &ImmOp =MI->getOperand(X86::AddrNumOperands);
292return ImmOp.getImm() == 0 ? Convert :Exit;
293 }
294case X86::OR16mi:
295case X86::OR32mi:
296case X86::OR64mi32: {
297constMachineOperand &ImmOp =MI->getOperand(X86::AddrNumOperands);
298return ImmOp.getImm() == -1 ? Convert :Exit;
299 }
300case X86::MOV32mi:
301case X86::MOV32mr:
302case X86::MOV64mi32:
303case X86::MOV64mr:
304return Convert;
305 }
306
307// Not all calling conventions have only stack MOVs between the stack
308// adjust and the call.
309
310// We want to tolerate other instructions, to cover more cases.
311// In particular:
312// a) PCrel calls, where we expect an additional COPY of the basereg.
313// b) Passing frame-index addresses.
314// c) Calling conventions that have inreg parameters. These generate
315// both copies and movs into registers.
316// To avoid creating lots of special cases, allow any instruction
317// that does not write into memory, does not def or use the stack
318// pointer, and does not def any register that was used by a preceding
319// push.
320// (Reading from memory is allowed, even if referenced through a
321// frame index, since these will get adjusted properly in PEI)
322
323// The reason for the last condition is that the pushes can't replace
324// the movs in place, because the order must be reversed.
325// So if we have a MOV32mr that uses EDX, then an instruction that defs
326// EDX, and then the call, after the transformation the push will use
327// the modified version of EDX, and not the original one.
328// Since we are still in SSA form at this point, we only need to
329// make sure we don't clobber any *physical* registers that were
330// used by an earlier mov that will become a push.
331
332if (MI->isCall() ||MI->mayStore())
333returnExit;
334
335for (constMachineOperand &MO :MI->operands()) {
336if (!MO.isReg())
337continue;
338RegisterReg = MO.getReg();
339if (!Reg.isPhysical())
340continue;
341if (RegInfo.regsOverlap(Reg,RegInfo.getStackRegister()))
342returnExit;
343if (MO.isDef()) {
344for (unsignedint U : UsedRegs)
345if (RegInfo.regsOverlap(Reg, U))
346returnExit;
347 }
348 }
349
350returnSkip;
351}
352
353void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
354MachineBasicBlock &MBB,
355MachineBasicBlock::iteratorI,
356 CallContext &Context) {
357// Check that this particular call sequence is amenable to the
358// transformation.
359constX86RegisterInfo &RegInfo =
360 *static_cast<constX86RegisterInfo *>(STI->getRegisterInfo());
361
362// We expect to enter this at the beginning of a call sequence
363assert(I->getOpcode() ==TII->getCallFrameSetupOpcode());
364MachineBasicBlock::iterator FrameSetup =I++;
365 Context.FrameSetup = FrameSetup;
366
367// How much do we adjust the stack? This puts an upper bound on
368// the number of parameters actually passed on it.
369unsignedint MaxAdjust =TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
370
371// A zero adjustment means no stack parameters
372if (!MaxAdjust) {
373 Context.NoStackParams =true;
374return;
375 }
376
377// Skip over DEBUG_VALUE.
378// For globals in PIC mode, we can have some LEAs here. Skip them as well.
379// TODO: Extend this to something that covers more cases.
380while (I->getOpcode() == X86::LEA32r ||I->isDebugInstr())
381 ++I;
382
383RegisterStackPtr =RegInfo.getStackRegister();
384auto StackPtrCopyInst =MBB.end();
385// SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
386// register. If it's there, use that virtual register as stack pointer
387// instead. Also, we need to locate this instruction so that we can later
388// safely ignore it while doing the conservative processing of the call chain.
389// The COPY can be located anywhere between the call-frame setup
390// instruction and its first use. We use the call instruction as a boundary
391// because it is usually cheaper to check if an instruction is a call than
392// checking if an instruction uses a register.
393for (auto J =I; !J->isCall(); ++J)
394if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
395 J->getOperand(1).getReg() == StackPtr) {
396 StackPtrCopyInst = J;
397 Context.SPCopy = &*J++;
398StackPtr = Context.SPCopy->getOperand(0).getReg();
399break;
400 }
401
402// Scan the call setup sequence for the pattern we're looking for.
403// We only handle a simple case - a sequence of store instructions that
404// push a sequence of stack-slot-aligned values onto the stack, with
405// no gaps between them.
406if (MaxAdjust > 4)
407 Context.ArgStoreVector.resize(MaxAdjust,nullptr);
408
409DenseSet<unsigned int> UsedRegs;
410
411for (InstClassification Classification = Skip; Classification !=Exit; ++I) {
412// If this is the COPY of the stack pointer, it's ok to ignore.
413if (I == StackPtrCopyInst)
414continue;
415 Classification = classifyInstruction(MBB,I,RegInfo, UsedRegs);
416if (Classification != Convert)
417continue;
418// We know the instruction has a supported store opcode.
419// We only want movs of the form:
420// mov imm/reg, k(%StackPtr)
421// If we run into something else, bail.
422// Note that AddrBaseReg may, counter to its name, not be a register,
423// but rather a frame index.
424// TODO: Support the fi case. This should probably work now that we
425// have the infrastructure to track the stack pointer within a call
426// sequence.
427if (!I->getOperand(X86::AddrBaseReg).isReg() ||
428 (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
429 !I->getOperand(X86::AddrScaleAmt).isImm() ||
430 (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
431 (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
432 (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
433 !I->getOperand(X86::AddrDisp).isImm())
434return;
435
436 int64_t StackDisp =I->getOperand(X86::AddrDisp).getImm();
437assert(StackDisp >= 0 &&
438"Negative stack displacement when passing parameters");
439
440// We really don't want to consider the unaligned case.
441if (StackDisp & (SlotSize - 1))
442return;
443 StackDisp >>= Log2SlotSize;
444
445assert((size_t)StackDisp < Context.ArgStoreVector.size() &&
446"Function call has more parameters than the stack is adjusted for.");
447
448// If the same stack slot is being filled twice, something's fishy.
449if (Context.ArgStoreVector[StackDisp] !=nullptr)
450return;
451 Context.ArgStoreVector[StackDisp] = &*I;
452
453for (constMachineOperand &MO :I->uses()) {
454if (!MO.isReg())
455continue;
456RegisterReg = MO.getReg();
457if (Reg.isPhysical())
458 UsedRegs.insert(Reg);
459 }
460 }
461
462 --I;
463
464// We now expect the end of the sequence. If we stopped early,
465// or reached the end of the block without finding a call, bail.
466if (I ==MBB.end() || !I->isCall())
467return;
468
469 Context.Call = &*I;
470if ((++I)->getOpcode() !=TII->getCallFrameDestroyOpcode())
471return;
472
473// Now, go through the vector, and see that we don't have any gaps,
474// but only a series of storing instructions.
475auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
476for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
477if (*MMI ==nullptr)
478break;
479
480// If the call had no parameters, do nothing
481if (MMI == Context.ArgStoreVector.begin())
482return;
483
484// We are either at the last parameter, or a gap.
485// Make sure it's not a gap
486for (; MMI != MME; ++MMI)
487if (*MMI !=nullptr)
488return;
489
490 Context.UsePush =true;
491}
492
493void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
494const CallContext &Context) {
495// Ok, we can in fact do the transformation for this call.
496// Do not remove the FrameSetup instruction, but adjust the parameters.
497// PEI will end up finalizing the handling of this.
498MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
499MachineBasicBlock &MBB = *(FrameSetup->getParent());
500TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
501
502constDebugLoc &DL = FrameSetup->getDebugLoc();
503bool Is64Bit = STI->is64Bit();
504// Now, iterate through the vector in reverse order, and replace the store to
505// stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
506// replace uses.
507for (intIdx = (Context.ExpectedDist >> Log2SlotSize) - 1;Idx >= 0; --Idx) {
508MachineBasicBlock::iteratorStore = *Context.ArgStoreVector[Idx];
509constMachineOperand &PushOp =Store->getOperand(X86::AddrNumOperands);
510MachineBasicBlock::iterator Push =nullptr;
511unsigned PushOpcode;
512switch (Store->getOpcode()) {
513default:
514llvm_unreachable("Unexpected Opcode!");
515case X86::AND16mi:
516case X86::AND32mi:
517case X86::AND64mi32:
518case X86::OR16mi:
519case X86::OR32mi:
520case X86::OR64mi32:
521case X86::MOV32mi:
522case X86::MOV64mi32:
523 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSH32i;
524 Push =BuildMI(MBB, Context.Call,DL,TII->get(PushOpcode)).add(PushOp);
525 Push->cloneMemRefs(MF, *Store);
526break;
527case X86::MOV32mr:
528case X86::MOV64mr: {
529RegisterReg = PushOp.getReg();
530
531// If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
532// in preparation for the PUSH64. The upper 32 bits can be undef.
533if (Is64Bit &&Store->getOpcode() == X86::MOV32mr) {
534Register UndefReg =MRI->createVirtualRegister(&X86::GR64RegClass);
535Reg =MRI->createVirtualRegister(&X86::GR64RegClass);
536BuildMI(MBB, Context.Call,DL,TII->get(X86::IMPLICIT_DEF), UndefReg);
537BuildMI(MBB, Context.Call,DL,TII->get(X86::INSERT_SUBREG), Reg)
538 .addReg(UndefReg)
539 .add(PushOp)
540 .addImm(X86::sub_32bit);
541 }
542
543// If PUSHrmm is not slow on this target, try to fold the source of the
544// push into the instruction.
545bool SlowPUSHrmm = STI->slowTwoMemOps();
546
547// Check that this is legal to fold. Right now, we're extremely
548// conservative about that.
549MachineInstr *DefMov =nullptr;
550if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
551 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
552 Push =BuildMI(MBB, Context.Call,DL,TII->get(PushOpcode));
553
554unsigned NumOps = DefMov->getDesc().getNumOperands();
555for (unsigned i = NumOps -X86::AddrNumOperands; i != NumOps; ++i)
556 Push->addOperand(DefMov->getOperand(i));
557 Push->cloneMergedMemRefs(MF, {DefMov, &*Store});
558 DefMov->eraseFromParent();
559 }else {
560 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
561 Push =BuildMI(MBB, Context.Call,DL,TII->get(PushOpcode))
562 .addReg(Reg)
563 .getInstr();
564 Push->cloneMemRefs(MF, *Store);
565 }
566break;
567 }
568 }
569
570// For debugging, when using SP-based CFA, we need to adjust the CFA
571// offset after each push.
572// TODO: This is needed only if we require precise CFA.
573if (!TFL->hasFP(MF))
574 TFL->BuildCFI(
575MBB, std::next(Push),DL,
576MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));
577
578MBB.erase(Store);
579 }
580
581// The stack-pointer copy is no longer used in the call sequences.
582// There should not be any other users, but we can't commit to that, so:
583if (Context.SPCopy &&MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
584 Context.SPCopy->eraseFromParent();
585
586// Once we've done this, we need to make sure PEI doesn't assume a reserved
587// frame.
588X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
589 FuncInfo->setHasPushSequences(true);
590}
591
592MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
593MachineBasicBlock::iterator FrameSetup,Register Reg) {
594// Do an extremely restricted form of load folding.
595// ISel will often create patterns like:
596// movl 4(%edi), %eax
597// movl 8(%edi), %ecx
598// movl 12(%edi), %edx
599// movl %edx, 8(%esp)
600// movl %ecx, 4(%esp)
601// movl %eax, (%esp)
602// call
603// Get rid of those with prejudice.
604if (!Reg.isVirtual())
605returnnullptr;
606
607// Make sure this is the only use of Reg.
608if (!MRI->hasOneNonDBGUse(Reg))
609returnnullptr;
610
611MachineInstr &DefMI = *MRI->getVRegDef(Reg);
612
613// Make sure the def is a MOV from memory.
614// If the def is in another block, give up.
615if ((DefMI.getOpcode() != X86::MOV32rm &&
616DefMI.getOpcode() != X86::MOV64rm) ||
617DefMI.getParent() != FrameSetup->getParent())
618returnnullptr;
619
620// Make sure we don't have any instructions between DefMI and the
621// push that make folding the load illegal.
622for (MachineBasicBlock::iteratorI =DefMI;I != FrameSetup; ++I)
623if (I->isLoadFoldBarrier())
624returnnullptr;
625
626return &DefMI;
627}
628
629FunctionPass *llvm::createX86CallFrameOptimization() {
630returnnew X86CallFrameOptimization();
631}
MRI
unsigned const MachineRegisterInfo * MRI
Definition:AArch64AdvSIMDScalarPass.cpp:105
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition:AArch64ExpandPseudoInsts.cpp:113
MBB
MachineBasicBlock & MBB
Definition:ARMSLSHardening.cpp:71
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
CommandLine.h
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
DebugLoc.h
DenseSet.h
This file defines the DenseSet and SmallDenseSet classes.
TII
const HexagonInstrInfo * TII
Definition:HexagonCopyToCombine.cpp:125
MI
IRTranslator LLVM IR MI
Definition:IRTranslator.cpp:112
Function.h
MCDwarf.h
I
#define I(x, y, z)
Definition:MD5.cpp:58
MachineBasicBlock.h
MachineFrameInfo.h
MachineFunctionPass.h
MachineFunction.h
MachineInstrBuilder.h
MachineInstr.h
MachineOperand.h
MachineRegisterInfo.h
MathExtras.h
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:38
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SmallVector.h
This file defines the SmallVector class.
isProfitable
static bool isProfitable(const SmallVector< std::unique_ptr< StableFunctionMap::StableFunctionEntry > > &SFS)
Definition:StableFunctionMap.cpp:162
StringRef.h
TargetInstrInfo.h
TargetRegisterInfo.h
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition:VPlanSLP.cpp:191
X86BaseInfo.h
NoX86CFOpt
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
DEBUG_TYPE
#define DEBUG_TYPE
Definition:X86CallFrameOptimization.cpp:52
X86FrameLowering.h
X86InstrInfo.h
X86MachineFunctionInfo.h
X86RegisterInfo.h
X86Subtarget.h
X86.h
bool
llvm::DebugLoc
A debug info location.
Definition:DebugLoc.h:33
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition:DenseSet.h:278
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition:Pass.h:310
llvm::MCCFIInstruction::createAdjustCfaOffset
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition:MCDwarf.h:598
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition:MCInstrDesc.h:237
llvm::MachineBasicBlock
Definition:MachineBasicBlock.h:125
llvm::MachineBasicBlock::end
iterator end()
Definition:MachineBasicBlock.h:357
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition:MachineBasicBlock.cpp:1443
llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition:MachineFrameInfo.h:357
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition:MachineFunctionPass.h:30
llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
llvm::MachineFunction
Definition:MachineFunction.h:267
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition:MachineFunction.h:733
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition:MachineFunction.h:749
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition:MachineFunction.h:743
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition:MachineFunction.h:704
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition:MachineFunction.h:831
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition:MachineInstrBuilder.h:133
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition:MachineInstrBuilder.h:226
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition:MachineInstrBuilder.h:99
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition:MachineInstrBuilder.h:91
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MachineInstr
Representation of each machine instruction.
Definition:MachineInstr.h:71
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition:MachineInstr.h:574
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition:MachineInstr.cpp:767
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition:MachineInstr.h:587
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition:MachineOperand.h:48
llvm::MachineOperand::getImm
int64_t getImm() const
Definition:MachineOperand.h:556
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition:MachineOperand.h:369
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition:MachineRegisterInfo.h:51
llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition:Pass.cpp:81
llvm::Register
Wrapper class representing virtual and physical registers.
Definition:Register.h:19
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::X86FrameLowering
Definition:X86FrameLowering.h:28
llvm::X86InstrInfo
Definition:X86InstrInfo.h:177
llvm::X86MachineFunctionInfo
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Definition:X86MachineFunctionInfo.h:58
llvm::X86MachineFunctionInfo::setHasPushSequences
void setHasPushSequences(bool HasPush)
Definition:X86MachineFunctionInfo.h:201
llvm::X86RegisterInfo
Definition:X86RegisterInfo.h:24
llvm::X86Subtarget
Definition:X86Subtarget.h:53
llvm::cl::opt_storage::getValue
DataType & getValue()
Definition:CommandLine.h:1352
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition:DenseSet.h:213
unsigned
ErrorHandling.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
llvm::COFF::Exit
@ Exit
Definition:COFF.h:845
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm::MCID::Call
@ Call
Definition:MCInstrDesc.h:156
llvm::SPII::Store
@ Store
Definition:SparcInstrInfo.h:33
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition:X86DisassemblerDecoder.h:621
llvm::X86::AddrBaseReg
@ AddrBaseReg
Definition:X86BaseInfo.h:29
llvm::X86::AddrScaleAmt
@ AddrScaleAmt
Definition:X86BaseInfo.h:30
llvm::X86::AddrSegmentReg
@ AddrSegmentReg
Definition:X86BaseInfo.h:34
llvm::X86::AddrDisp
@ AddrDisp
Definition:X86BaseInfo.h:32
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition:X86BaseInfo.h:31
llvm::X86::AddrNumOperands
@ AddrNumOperands
Definition:X86BaseInfo.h:36
llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Skip
@ Skip
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm::codeview::EncodedFramePtrReg::StackPtr
@ StackPtr
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition:MachineInstrBuilder.h:373
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition:Alignment.h:145
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition:MathExtras.h:341
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition:MathExtras.h:292
llvm::createX86CallFrameOptimization
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
Definition:X86CallFrameOptimization.cpp:629
RegInfo
Definition:AMDGPUAsmParser.cpp:2770
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::cl::desc
Definition:CommandLine.h:409

Generated on Fri Jul 18 2025 14:54:33 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp