Movatterモバイル変換

Go to the documentation of this file.

1//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This file defines a pass that optimizes call sequences on x86.

10// Currently, it converts movs of function parameters onto the stack into

11// pushes. This is beneficial for two main reasons:

12// 1) The push instruction encoding is much smaller than a stack-ptr-based mov.

13// 2) It is possible to push memory arguments directly. So, if the

14// the transformation is performed pre-reg-alloc, it can help relieve

15// register pressure.

16//

17//===----------------------------------------------------------------------===//

19#include "MCTargetDesc/X86BaseInfo.h"

20#include "X86.h"

21#include "X86FrameLowering.h"

22#include "X86InstrInfo.h"

23#include "X86MachineFunctionInfo.h"

24#include "X86RegisterInfo.h"

25#include "X86Subtarget.h"

26#include "llvm/ADT/DenseSet.h"

27#include "llvm/ADT/SmallVector.h"

28#include "llvm/ADT/StringRef.h"

29#include "llvm/CodeGen/MachineBasicBlock.h"

30#include "llvm/CodeGen/MachineFrameInfo.h"

31#include "llvm/CodeGen/MachineFunction.h"

32#include "llvm/CodeGen/MachineFunctionPass.h"

33#include "llvm/CodeGen/MachineInstr.h"

34#include "llvm/CodeGen/MachineInstrBuilder.h"

35#include "llvm/CodeGen/MachineOperand.h"

36#include "llvm/CodeGen/MachineRegisterInfo.h"

37#include "llvm/CodeGen/TargetInstrInfo.h"

38#include "llvm/CodeGen/TargetRegisterInfo.h"

39#include "llvm/IR/DebugLoc.h"

40#include "llvm/IR/Function.h"

41#include "llvm/MC/MCDwarf.h"

42#include "llvm/Support/CommandLine.h"

43#include "llvm/Support/ErrorHandling.h"

44#include "llvm/Support/MathExtras.h"

45#include <cassert>

46#include <cstddef>

47#include <cstdint>

48#include <iterator>

50using namespacellvm;

52#define DEBUG_TYPE "x86-cf-opt"

54staticcl::opt<bool>

55NoX86CFOpt("no-x86-call-frame-opt",

56cl::desc("Avoid optimizing x86 call frames for size"),

57cl::init(false),cl::Hidden);

59namespace{

61classX86CallFrameOptimization :publicMachineFunctionPass {

62public:

63 X86CallFrameOptimization() :MachineFunctionPass(ID) { }

65boolrunOnMachineFunction(MachineFunction &MF)override;

67staticcharID;

69private:

70// Information we know about a particular call site

71structCallContext {

72 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}

74// Iterator referring to the frame setup instruction

75MachineBasicBlock::iterator FrameSetup;

77// Actual call instruction

78MachineInstr *Call =nullptr;

80// A copy of the stack pointer

81MachineInstr *SPCopy =nullptr;

83// The total displacement of all passed parameters

84 int64_t ExpectedDist = 0;

86// The sequence of storing instructions used to pass the parameters

87SmallVector<MachineInstr *, 4> ArgStoreVector;

89// True if this call site has no stack parameters

90bool NoStackParams =false;

92// True if this call site can use push instructions

93bool UsePush =false;

94 };

96typedefSmallVector<CallContext, 8> ContextVector;

98bool isLegal(MachineFunction &MF);

100boolisProfitable(MachineFunction &MF, ContextVector &CallSeqMap);

101

102void collectCallInfo(MachineFunction &MF,MachineBasicBlock &MBB,

103MachineBasicBlock::iterator I, CallContext &Context);

104

105void adjustCallSequence(MachineFunction &MF,const CallContext &Context);

106

107MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,

108Register Reg);

109

110enum InstClassification { Convert,Skip,Exit };

111

112 InstClassification classifyInstruction(MachineBasicBlock &MBB,

113MachineBasicBlock::iterator MI,

114constX86RegisterInfo &RegInfo,

115DenseSet<unsigned int> &UsedRegs);

116

117StringRef getPassName() const override{return"X86 Optimize Call Frame"; }

118

119constX86InstrInfo *TII =nullptr;

120constX86FrameLowering *TFL =nullptr;

121constX86Subtarget *STI =nullptr;

122MachineRegisterInfo *MRI =nullptr;

123unsigned SlotSize = 0;

124unsigned Log2SlotSize = 0;

125};

126

127}// end anonymous namespace

128char X86CallFrameOptimization::ID = 0;

129INITIALIZE_PASS(X86CallFrameOptimization,DEBUG_TYPE,

130"X86 Call Frame Optimization",false,false)

131

132// This checks whether the transformation is legal.

133// Also returns false in cases where it's potentially legal, but

134// we don't even want to try.

135bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {

136if (NoX86CFOpt.getValue())

137returnfalse;

138

139// We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset

140// in the compact unwind encoding that Darwin uses. So, bail if there

141// is a danger of that being generated.

142if (STI->isTargetDarwin() &&

143 (!MF.getLandingPads().empty() ||

144 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))

145returnfalse;

146

147// It is not valid to change the stack pointer outside the prolog/epilog

148// on 64-bit Windows.

149if (STI->isTargetWin64())

150returnfalse;

151

152// You would expect straight-line code between call-frame setup and

153// call-frame destroy. You would be wrong. There are circumstances (e.g.

154// CMOV_GR8 expansion of a select that feeds a function call!) where we can

155// end up with the setup and the destroy in different basic blocks.

156// This is bad, and breaks SP adjustment.

157// So, check that all of the frames in the function are closed inside

158// the same block, and, for good measure, that there are no nested frames.

159//

160// If any call allocates more argument stack memory than the stack

161// probe size, don't do this optimization. Otherwise, this pass

162// would need to synthesize additional stack probe calls to allocate

163// memory for arguments.

164unsigned FrameSetupOpcode =TII->getCallFrameSetupOpcode();

165unsigned FrameDestroyOpcode =TII->getCallFrameDestroyOpcode();

166bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);

167unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);

168for (MachineBasicBlock &BB : MF) {

169bool InsideFrameSequence =false;

170for (MachineInstr &MI : BB) {

171if (MI.getOpcode() == FrameSetupOpcode) {

172if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)

173returnfalse;

174if (InsideFrameSequence)

175returnfalse;

176 InsideFrameSequence =true;

177 }elseif (MI.getOpcode() == FrameDestroyOpcode) {

178if (!InsideFrameSequence)

179returnfalse;

180 InsideFrameSequence =false;

181 }

182 }

183

184if (InsideFrameSequence)

185returnfalse;

186 }

187

188returntrue;

189}

190

191// Check whether this transformation is profitable for a particular

192// function - in terms of code size.

193bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,

194 ContextVector &CallSeqVector) {

195// This transformation is always a win when we do not expect to have

196// a reserved call frame. Under other circumstances, it may be either

197// a win or a loss, and requires a heuristic.

198bool CannotReserveFrame = MF.getFrameInfo().hasVarSizedObjects();

199if (CannotReserveFrame)

200returntrue;

201

202Align StackAlign = TFL->getStackAlign();

203

204 int64_t Advantage = 0;

205for (constauto &CC : CallSeqVector) {

206// Call sites where no parameters are passed on the stack

207// do not affect the cost, since there needs to be no

208// stack adjustment.

209if (CC.NoStackParams)

210continue;

211

212if (!CC.UsePush) {

213// If we don't use pushes for a particular call site,

214// we pay for not having a reserved call frame with an

215// additional sub/add esp pair. The cost is ~3 bytes per instruction,

216// depending on the size of the constant.

217// TODO: Callee-pop functions should have a smaller penalty, because

218// an add is needed even with a reserved call frame.

219 Advantage -= 6;

220 }else {

221// We can use pushes. First, account for the fixed costs.

222// We'll need a add after the call.

223 Advantage -= 3;

224// If we have to realign the stack, we'll also need a sub before

225if (!isAligned(StackAlign,CC.ExpectedDist))

226 Advantage -= 3;

227// Now, for each push, we save ~3 bytes. For small constants, we actually,

228// save more (up to 5 bytes), but 3 should be a good approximation.

229 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;

230 }

231 }

232

233return Advantage >= 0;

234}

235

236bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {

237 STI = &MF.getSubtarget<X86Subtarget>();

238TII = STI->getInstrInfo();

239 TFL = STI->getFrameLowering();

240MRI = &MF.getRegInfo();

241

242constX86RegisterInfo &RegInfo =

243 *static_cast<constX86RegisterInfo *>(STI->getRegisterInfo());

244 SlotSize =RegInfo.getSlotSize();

245assert(isPowerOf2_32(SlotSize) &&"Expect power of 2 stack slot size");

246 Log2SlotSize =Log2_32(SlotSize);

247

248if (skipFunction(MF.getFunction()) || !isLegal(MF))

249returnfalse;

250

251unsigned FrameSetupOpcode =TII->getCallFrameSetupOpcode();

252

253bool Changed =false;

254

255 ContextVector CallSeqVector;

256

257for (auto &MBB : MF)

258for (auto &MI :MBB)

259if (MI.getOpcode() == FrameSetupOpcode) {

260 CallContext Context;

261 collectCallInfo(MF,MBB,MI, Context);

262 CallSeqVector.push_back(Context);

263 }

264

265if (!isProfitable(MF, CallSeqVector))

266returnfalse;

267

268for (constauto &CC : CallSeqVector) {

269if (CC.UsePush) {

270 adjustCallSequence(MF,CC);

271 Changed =true;

272 }

273 }

274

275return Changed;

276}

277

278X86CallFrameOptimization::InstClassification

279X86CallFrameOptimization::classifyInstruction(

280MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,

281constX86RegisterInfo &RegInfo,DenseSet<unsigned int> &UsedRegs) {

282if (MI ==MBB.end())

283returnExit;

284

285// The instructions we actually care about are movs onto the stack or special

286// cases of constant-stores to stack

287switch (MI->getOpcode()) {

288case X86::AND16mi:

289case X86::AND32mi:

290case X86::AND64mi32: {

291constMachineOperand &ImmOp =MI->getOperand(X86::AddrNumOperands);

292return ImmOp.getImm() == 0 ? Convert :Exit;

293 }

294case X86::OR16mi:

295case X86::OR32mi:

296case X86::OR64mi32: {

297constMachineOperand &ImmOp =MI->getOperand(X86::AddrNumOperands);

298return ImmOp.getImm() == -1 ? Convert :Exit;

299 }

300case X86::MOV32mi:

301case X86::MOV32mr:

302case X86::MOV64mi32:

303case X86::MOV64mr:

304return Convert;

305 }

306

307// Not all calling conventions have only stack MOVs between the stack

308// adjust and the call.

309

310// We want to tolerate other instructions, to cover more cases.

311// In particular:

312// a) PCrel calls, where we expect an additional COPY of the basereg.

313// b) Passing frame-index addresses.

314// c) Calling conventions that have inreg parameters. These generate

315// both copies and movs into registers.

316// To avoid creating lots of special cases, allow any instruction

317// that does not write into memory, does not def or use the stack

318// pointer, and does not def any register that was used by a preceding

319// push.

320// (Reading from memory is allowed, even if referenced through a

321// frame index, since these will get adjusted properly in PEI)

322

323// The reason for the last condition is that the pushes can't replace

324// the movs in place, because the order must be reversed.

325// So if we have a MOV32mr that uses EDX, then an instruction that defs

326// EDX, and then the call, after the transformation the push will use

327// the modified version of EDX, and not the original one.

328// Since we are still in SSA form at this point, we only need to

329// make sure we don't clobber any *physical* registers that were

330// used by an earlier mov that will become a push.

331

332if (MI->isCall() ||MI->mayStore())

333returnExit;

334

335for (constMachineOperand &MO :MI->operands()) {

336if (!MO.isReg())

337continue;

338Register Reg = MO.getReg();

339if (!Reg.isPhysical())

340continue;

341if (RegInfo.regsOverlap(Reg,RegInfo.getStackRegister()))

342returnExit;

343if (MO.isDef()) {

344for (unsignedint U : UsedRegs)

345if (RegInfo.regsOverlap(Reg, U))

346returnExit;

347 }

348 }

349

350returnSkip;

351}

352

353void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,

354MachineBasicBlock &MBB,

355MachineBasicBlock::iterator I,

356 CallContext &Context) {

357// Check that this particular call sequence is amenable to the

358// transformation.

359constX86RegisterInfo &RegInfo =

360 *static_cast<constX86RegisterInfo *>(STI->getRegisterInfo());

361

362// We expect to enter this at the beginning of a call sequence

363assert(I->getOpcode() ==TII->getCallFrameSetupOpcode());

364MachineBasicBlock::iterator FrameSetup =I++;

365 Context.FrameSetup = FrameSetup;

366

367// How much do we adjust the stack? This puts an upper bound on

368// the number of parameters actually passed on it.

369unsignedint MaxAdjust =TII->getFrameSize(*FrameSetup) >> Log2SlotSize;

370

371// A zero adjustment means no stack parameters

372if (!MaxAdjust) {

373 Context.NoStackParams =true;

374return;

375 }

376

377// Skip over DEBUG_VALUE.

378// For globals in PIC mode, we can have some LEAs here. Skip them as well.

379// TODO: Extend this to something that covers more cases.

380while (I->getOpcode() == X86::LEA32r ||I->isDebugInstr())

381 ++I;

382

383Register StackPtr =RegInfo.getStackRegister();

384auto StackPtrCopyInst =MBB.end();

385// SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual

386// register. If it's there, use that virtual register as stack pointer

387// instead. Also, we need to locate this instruction so that we can later

388// safely ignore it while doing the conservative processing of the call chain.

389// The COPY can be located anywhere between the call-frame setup

390// instruction and its first use. We use the call instruction as a boundary

391// because it is usually cheaper to check if an instruction is a call than

392// checking if an instruction uses a register.

393for (auto J =I; !J->isCall(); ++J)

394if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&

395 J->getOperand(1).getReg() == StackPtr) {

396 StackPtrCopyInst = J;

397 Context.SPCopy = &*J++;

398StackPtr = Context.SPCopy->getOperand(0).getReg();

399break;

400 }

401

402// Scan the call setup sequence for the pattern we're looking for.

403// We only handle a simple case - a sequence of store instructions that

404// push a sequence of stack-slot-aligned values onto the stack, with

405// no gaps between them.

406if (MaxAdjust > 4)

407 Context.ArgStoreVector.resize(MaxAdjust,nullptr);

408

409DenseSet<unsigned int> UsedRegs;

410

411for (InstClassification Classification = Skip; Classification !=Exit; ++I) {

412// If this is the COPY of the stack pointer, it's ok to ignore.

413if (I == StackPtrCopyInst)

414continue;

415 Classification = classifyInstruction(MBB,I,RegInfo, UsedRegs);

416if (Classification != Convert)

417continue;

418// We know the instruction has a supported store opcode.

419// We only want movs of the form:

420// mov imm/reg, k(%StackPtr)

421// If we run into something else, bail.

422// Note that AddrBaseReg may, counter to its name, not be a register,

423// but rather a frame index.

424// TODO: Support the fi case. This should probably work now that we

425// have the infrastructure to track the stack pointer within a call

426// sequence.

427if (!I->getOperand(X86::AddrBaseReg).isReg() ||

428 (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||

429 !I->getOperand(X86::AddrScaleAmt).isImm() ||

430 (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||

431 (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||

432 (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||

433 !I->getOperand(X86::AddrDisp).isImm())

434return;

435

436 int64_t StackDisp =I->getOperand(X86::AddrDisp).getImm();

437assert(StackDisp >= 0 &&

438"Negative stack displacement when passing parameters");

439

440// We really don't want to consider the unaligned case.

441if (StackDisp & (SlotSize - 1))

442return;

443 StackDisp >>= Log2SlotSize;

444

445assert((size_t)StackDisp < Context.ArgStoreVector.size() &&

446"Function call has more parameters than the stack is adjusted for.");

447

448// If the same stack slot is being filled twice, something's fishy.

449if (Context.ArgStoreVector[StackDisp] !=nullptr)

450return;

451 Context.ArgStoreVector[StackDisp] = &*I;

452

453for (constMachineOperand &MO :I->uses()) {

454if (!MO.isReg())

455continue;

456Register Reg = MO.getReg();

457if (Reg.isPhysical())

458 UsedRegs.insert(Reg);

459 }

460 }

461

462 --I;

463

464// We now expect the end of the sequence. If we stopped early,

465// or reached the end of the block without finding a call, bail.

466if (I ==MBB.end() || !I->isCall())

467return;

468

469 Context.Call = &*I;

470if ((++I)->getOpcode() !=TII->getCallFrameDestroyOpcode())

471return;

472

473// Now, go through the vector, and see that we don't have any gaps,

474// but only a series of storing instructions.

475auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();

476for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)

477if (*MMI ==nullptr)

478break;

479

480// If the call had no parameters, do nothing

481if (MMI == Context.ArgStoreVector.begin())

482return;

483

484// We are either at the last parameter, or a gap.

485// Make sure it's not a gap

486for (; MMI != MME; ++MMI)

487if (*MMI !=nullptr)

488return;

489

490 Context.UsePush =true;

491}

492

493void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,

494const CallContext &Context) {

495// Ok, we can in fact do the transformation for this call.

496// Do not remove the FrameSetup instruction, but adjust the parameters.

497// PEI will end up finalizing the handling of this.

498MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;

499MachineBasicBlock &MBB = *(FrameSetup->getParent());

500TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);

501

502constDebugLoc &DL = FrameSetup->getDebugLoc();

503bool Is64Bit = STI->is64Bit();

504// Now, iterate through the vector in reverse order, and replace the store to

505// stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to

506// replace uses.

507for (intIdx = (Context.ExpectedDist >> Log2SlotSize) - 1;Idx >= 0; --Idx) {

508MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx];

509constMachineOperand &PushOp =Store->getOperand(X86::AddrNumOperands);

510MachineBasicBlock::iterator Push =nullptr;

511unsigned PushOpcode;

512switch (Store->getOpcode()) {

513default:

514llvm_unreachable("Unexpected Opcode!");

515case X86::AND16mi:

516case X86::AND32mi:

517case X86::AND64mi32:

518case X86::OR16mi:

519case X86::OR32mi:

520case X86::OR64mi32:

521case X86::MOV32mi:

522case X86::MOV64mi32:

523 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSH32i;

524 Push =BuildMI(MBB, Context.Call,DL,TII->get(PushOpcode)).add(PushOp);

525 Push->cloneMemRefs(MF, *Store);

526break;

527case X86::MOV32mr:

528case X86::MOV64mr: {

529Register Reg = PushOp.getReg();

530

531// If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg

532// in preparation for the PUSH64. The upper 32 bits can be undef.

533if (Is64Bit &&Store->getOpcode() == X86::MOV32mr) {

534Register UndefReg =MRI->createVirtualRegister(&X86::GR64RegClass);

535Reg =MRI->createVirtualRegister(&X86::GR64RegClass);

536BuildMI(MBB, Context.Call,DL,TII->get(X86::IMPLICIT_DEF), UndefReg);

537BuildMI(MBB, Context.Call,DL,TII->get(X86::INSERT_SUBREG), Reg)

538 .addReg(UndefReg)

539 .add(PushOp)

540 .addImm(X86::sub_32bit);

541 }

542

543// If PUSHrmm is not slow on this target, try to fold the source of the

544// push into the instruction.

545bool SlowPUSHrmm = STI->slowTwoMemOps();

546

547// Check that this is legal to fold. Right now, we're extremely

548// conservative about that.

549MachineInstr *DefMov =nullptr;

550if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {

551 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;

552 Push =BuildMI(MBB, Context.Call,DL,TII->get(PushOpcode));

553

554unsigned NumOps = DefMov->getDesc().getNumOperands();

555for (unsigned i = NumOps -X86::AddrNumOperands; i != NumOps; ++i)

556 Push->addOperand(DefMov->getOperand(i));

557 Push->cloneMergedMemRefs(MF, {DefMov, &*Store});

558 DefMov->eraseFromParent();

559 }else {

560 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;

561 Push =BuildMI(MBB, Context.Call,DL,TII->get(PushOpcode))

562 .addReg(Reg)

563 .getInstr();

564 Push->cloneMemRefs(MF, *Store);

565 }

566break;

567 }

568 }

569

570// For debugging, when using SP-based CFA, we need to adjust the CFA

571// offset after each push.

572// TODO: This is needed only if we require precise CFA.

573if (!TFL->hasFP(MF))

574 TFL->BuildCFI(

575MBB, std::next(Push),DL,

576MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));

577

578MBB.erase(Store);

579 }

580

581// The stack-pointer copy is no longer used in the call sequences.

582// There should not be any other users, but we can't commit to that, so:

583if (Context.SPCopy &&MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))

584 Context.SPCopy->eraseFromParent();

585

586// Once we've done this, we need to make sure PEI doesn't assume a reserved

587// frame.

588X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();

589 FuncInfo->setHasPushSequences(true);

590}

591

592MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(

593MachineBasicBlock::iterator FrameSetup,Register Reg) {

594// Do an extremely restricted form of load folding.

595// ISel will often create patterns like:

596// movl 4(%edi), %eax

597// movl 8(%edi), %ecx

598// movl 12(%edi), %edx

599// movl %edx, 8(%esp)

600// movl %ecx, 4(%esp)

601// movl %eax, (%esp)

602// call

603// Get rid of those with prejudice.

604if (!Reg.isVirtual())

605returnnullptr;

606

607// Make sure this is the only use of Reg.

608if (!MRI->hasOneNonDBGUse(Reg))

609returnnullptr;

610

611MachineInstr &DefMI = *MRI->getVRegDef(Reg);

612

613// Make sure the def is a MOV from memory.

614// If the def is in another block, give up.

615if ((DefMI.getOpcode() != X86::MOV32rm &&

616DefMI.getOpcode() != X86::MOV64rm) ||

617DefMI.getParent() != FrameSetup->getParent())

618returnnullptr;

619

620// Make sure we don't have any instructions between DefMI and the

621// push that make folding the load illegal.

622for (MachineBasicBlock::iterator I =DefMI;I != FrameSetup; ++I)

623if (I->isLoadFoldBarrier())

624returnnullptr;

625

626return &DefMI;

627}

628

629FunctionPass *llvm::createX86CallFrameOptimization() {

630returnnew X86CallFrameOptimization();

631}

MRI

unsigned const MachineRegisterInfo * MRI

Definition:AArch64AdvSIMDScalarPass.cpp:105

DefMI

MachineInstrBuilder MachineInstrBuilder & DefMI

Definition:AArch64ExpandPseudoInsts.cpp:113

MBB

MachineBasicBlock & MBB

Definition:ARMSLSHardening.cpp:71

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

CommandLine.h

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

DebugLoc.h

DenseSet.h

This file defines the DenseSet and SmallDenseSet classes.

TII

const HexagonInstrInfo * TII

Definition:HexagonCopyToCombine.cpp:125

IRTranslator LLVM IR MI

Definition:IRTranslator.cpp:112

Function.h

MCDwarf.h

#define I(x, y, z)

Definition:MD5.cpp:58

MachineBasicBlock.h

MachineFrameInfo.h

MachineFunctionPass.h

MachineFunction.h

MachineInstrBuilder.h

MachineInstr.h

MachineOperand.h

MachineRegisterInfo.h

MathExtras.h

INITIALIZE_PASS

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:38

auto CC

Definition:RISCVRedundantCopyElimination.cpp:79

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SmallVector.h

This file defines the SmallVector class.

isProfitable

static bool isProfitable(const SmallVector< std::unique_ptr< StableFunctionMap::StableFunctionEntry > > &SFS)

Definition:StableFunctionMap.cpp:162

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

Definition:VPlanSLP.cpp:191

X86BaseInfo.h

NoX86CFOpt

static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)

DEBUG_TYPE

#define DEBUG_TYPE

Definition:X86CallFrameOptimization.cpp:52

X86FrameLowering.h

X86InstrInfo.h

X86MachineFunctionInfo.h

A debug info location.

Definition:DebugLoc.h:33

llvm::DenseSet

Implements a dense probed hash-table based set.

Definition:DenseSet.h:278

llvm::FunctionPass

FunctionPass class - This class is used to implement most global optimizations.

Definition:Pass.h:310

llvm::MCCFIInstruction::createAdjustCfaOffset

static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})

.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...

Definition:MCDwarf.h:598

llvm::MCInstrDesc::getNumOperands

unsigned getNumOperands() const

Return the number of declared MachineOperands for this MachineInstruction.

Definition:MCInstrDesc.h:237

llvm::MachineBasicBlock

Definition:MachineBasicBlock.h:125

llvm::MachineBasicBlock::end

iterator end()

Definition:MachineBasicBlock.h:357

llvm::MachineBasicBlock::erase

instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

Definition:MachineBasicBlock.cpp:1443

llvm::MachineFrameInfo::hasVarSizedObjects

bool hasVarSizedObjects() const

This method may be called any time after instruction selection is complete to determine if the stack ...

Definition:MachineFrameInfo.h:357

llvm::MachineFunctionPass

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

Definition:MachineFunctionPass.h:30

llvm::MachineFunctionPass::runOnMachineFunction

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction

Definition:MachineFunction.h:267

llvm::MachineFunction::getSubtarget

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Definition:MachineFunction.h:733

llvm::MachineFunction::getFrameInfo

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

Definition:MachineFunction.h:749

llvm::MachineFunction::getRegInfo

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Definition:MachineFunction.h:743

llvm::MachineFunction::getFunction

Function & getFunction()

Return the LLVM function that this machine code represents.

Definition:MachineFunction.h:704

llvm::MachineFunction::getInfo

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Definition:MachineFunction.h:831

llvm::MachineInstrBuilder::addImm

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

Definition:MachineInstrBuilder.h:133

llvm::MachineInstrBuilder::add

const MachineInstrBuilder & add(const MachineOperand &MO) const

Definition:MachineInstrBuilder.h:226

llvm::MachineInstrBuilder::addReg

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Definition:MachineInstrBuilder.h:99

llvm::MachineInstrBuilder::getInstr

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

Definition:MachineInstrBuilder.h:91

llvm::MachineInstrBundleIterator< MachineInstr >

llvm::MachineInstr

Representation of each machine instruction.

Definition:MachineInstr.h:71

llvm::MachineInstr::getDesc

const MCInstrDesc & getDesc() const

Returns the target instruction descriptor of this MachineInstr.

Definition:MachineInstr.h:574

llvm::MachineInstr::eraseFromParent

void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

Definition:MachineInstr.cpp:767

llvm::MachineInstr::getOperand

const MachineOperand & getOperand(unsigned i) const

Definition:MachineInstr.h:587

llvm::MachineOperand

MachineOperand class - Representation of each machine instruction operand.

Definition:MachineOperand.h:48

llvm::MachineOperand::getImm

int64_t getImm() const

Definition:MachineOperand.h:556

llvm::MachineOperand::getReg

getReg - Returns the register number.

Definition:MachineOperand.h:369

llvm::MachineRegisterInfo

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Definition:MachineRegisterInfo.h:51

llvm::Pass::getPassName

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

Definition:Pass.cpp:81

llvm::Register

Wrapper class representing virtual and physical registers.

Definition:Register.h:19

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::X86FrameLowering

Definition:X86FrameLowering.h:28

llvm::X86InstrInfo

Definition:X86InstrInfo.h:177

llvm::X86MachineFunctionInfo

X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...

Definition:X86MachineFunctionInfo.h:58

llvm::X86MachineFunctionInfo::setHasPushSequences

void setHasPushSequences(bool HasPush)

Definition:X86MachineFunctionInfo.h:201

llvm::X86RegisterInfo

Definition:X86RegisterInfo.h:24

llvm::X86Subtarget

Definition:X86Subtarget.h:53

llvm::cl::opt_storage::getValue

DataType & getValue()

Definition:CommandLine.h:1352

llvm::cl::opt

Definition:CommandLine.h:1423

llvm::detail::DenseSetImpl::insert

std::pair< iterator, bool > insert(const ValueT &V)

Definition:DenseSet.h:213

unsigned

ErrorHandling.h

llvm_unreachable

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

llvm::COFF::Exit

@ Exit

Definition:COFF.h:845

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition:CallingConv.h:24

llvm::MCID::Call

@ Call

Definition:MCInstrDesc.h:156

llvm::SPII::Store

@ Store

Definition:SparcInstrInfo.h:33

llvm::X86Disassembler::Reg

Reg

All possible values of the reg field in the ModR/M byte.

Definition:X86DisassemblerDecoder.h:621

llvm::X86::AddrBaseReg

@ AddrBaseReg

Definition:X86BaseInfo.h:29

llvm::X86::AddrScaleAmt

@ AddrScaleAmt

Definition:X86BaseInfo.h:30

llvm::X86::AddrSegmentReg

@ AddrSegmentReg

Definition:X86BaseInfo.h:34

llvm::X86::AddrDisp

@ AddrDisp

Definition:X86BaseInfo.h:32

llvm::X86::AddrIndexReg

@ AddrIndexReg

Definition:X86BaseInfo.h:31

llvm::X86::AddrNumOperands

@ AddrNumOperands

Definition:X86BaseInfo.h:36

llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Skip

@ Skip

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm::codeview::EncodedFramePtrReg::StackPtr

@ StackPtr

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::BuildMI

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

Definition:MachineInstrBuilder.h:373

llvm::isAligned

bool isAligned(Align Lhs, uint64_t SizeInBytes)

Checks that SizeInBytes is a multiple of the alignment.

Definition:Alignment.h:145

llvm::Log2_32

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

Definition:MathExtras.h:341

llvm::isPowerOf2_32

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

Definition:MathExtras.h:292

llvm::createX86CallFrameOptimization

FunctionPass * createX86CallFrameOptimization()

Return a pass that optimizes the code-size of x86 call sequences.

Definition:X86CallFrameOptimization.cpp:629

RegInfo

Definition:AMDGPUAsmParser.cpp:2770

llvm::Align

This struct is a compact representation of a valid (non-zero power of two) alignment.

Definition:Alignment.h:39

llvm::cl::desc

Definition:CommandLine.h:409