Movatterモバイル変換

Go to the documentation of this file.

1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This pass munges the code in the input function to better prepare it for

10// SelectionDAG-based code generation. This works around limitations in it's

11// basic-block-at-a-time approach. It should eventually be removed.

12//

13//===----------------------------------------------------------------------===//

15#include "llvm/CodeGen/CodeGenPrepare.h"

16#include "llvm/ADT/APInt.h"

17#include "llvm/ADT/ArrayRef.h"

18#include "llvm/ADT/DenseMap.h"

19#include "llvm/ADT/MapVector.h"

20#include "llvm/ADT/PointerIntPair.h"

21#include "llvm/ADT/STLExtras.h"

22#include "llvm/ADT/SmallPtrSet.h"

23#include "llvm/ADT/SmallVector.h"

24#include "llvm/ADT/Statistic.h"

25#include "llvm/Analysis/BlockFrequencyInfo.h"

26#include "llvm/Analysis/BranchProbabilityInfo.h"

27#include "llvm/Analysis/InstructionSimplify.h"

28#include "llvm/Analysis/LoopInfo.h"

29#include "llvm/Analysis/ProfileSummaryInfo.h"

30#include "llvm/Analysis/ScalarEvolutionExpressions.h"

31#include "llvm/Analysis/TargetLibraryInfo.h"

32#include "llvm/Analysis/TargetTransformInfo.h"

33#include "llvm/Analysis/ValueTracking.h"

34#include "llvm/Analysis/VectorUtils.h"

35#include "llvm/CodeGen/Analysis.h"

36#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"

37#include "llvm/CodeGen/ISDOpcodes.h"

38#include "llvm/CodeGen/SelectionDAGNodes.h"

39#include "llvm/CodeGen/TargetLowering.h"

40#include "llvm/CodeGen/TargetPassConfig.h"

41#include "llvm/CodeGen/TargetSubtargetInfo.h"

42#include "llvm/CodeGen/ValueTypes.h"

43#include "llvm/CodeGenTypes/MachineValueType.h"

44#include "llvm/Config/llvm-config.h"

45#include "llvm/IR/Argument.h"

46#include "llvm/IR/Attributes.h"

47#include "llvm/IR/BasicBlock.h"

48#include "llvm/IR/Constant.h"

49#include "llvm/IR/Constants.h"

50#include "llvm/IR/DataLayout.h"

51#include "llvm/IR/DebugInfo.h"

52#include "llvm/IR/DerivedTypes.h"

53#include "llvm/IR/Dominators.h"

54#include "llvm/IR/Function.h"

55#include "llvm/IR/GetElementPtrTypeIterator.h"

56#include "llvm/IR/GlobalValue.h"

57#include "llvm/IR/GlobalVariable.h"

58#include "llvm/IR/IRBuilder.h"

59#include "llvm/IR/InlineAsm.h"

60#include "llvm/IR/InstrTypes.h"

61#include "llvm/IR/Instruction.h"

62#include "llvm/IR/Instructions.h"

63#include "llvm/IR/IntrinsicInst.h"

64#include "llvm/IR/Intrinsics.h"

65#include "llvm/IR/IntrinsicsAArch64.h"

66#include "llvm/IR/LLVMContext.h"

67#include "llvm/IR/MDBuilder.h"

68#include "llvm/IR/Module.h"

69#include "llvm/IR/Operator.h"

70#include "llvm/IR/PatternMatch.h"

71#include "llvm/IR/ProfDataUtils.h"

72#include "llvm/IR/Statepoint.h"

73#include "llvm/IR/Type.h"

74#include "llvm/IR/Use.h"

75#include "llvm/IR/User.h"

76#include "llvm/IR/Value.h"

77#include "llvm/IR/ValueHandle.h"

78#include "llvm/IR/ValueMap.h"

79#include "llvm/InitializePasses.h"

80#include "llvm/Pass.h"

81#include "llvm/Support/BlockFrequency.h"

82#include "llvm/Support/BranchProbability.h"

83#include "llvm/Support/Casting.h"

84#include "llvm/Support/CommandLine.h"

85#include "llvm/Support/Compiler.h"

86#include "llvm/Support/Debug.h"

87#include "llvm/Support/ErrorHandling.h"

88#include "llvm/Support/raw_ostream.h"

89#include "llvm/Target/TargetMachine.h"

90#include "llvm/Target/TargetOptions.h"

91#include "llvm/Transforms/Utils/BasicBlockUtils.h"

92#include "llvm/Transforms/Utils/BypassSlowDivision.h"

93#include "llvm/Transforms/Utils/Local.h"

94#include "llvm/Transforms/Utils/SimplifyLibCalls.h"

95#include "llvm/Transforms/Utils/SizeOpts.h"

96#include <algorithm>

97#include <cassert>

98#include <cstdint>

99#include <iterator>

100#include <limits>

101#include <memory>

102#include <optional>

103#include <utility>

104#include <vector>

105

106using namespacellvm;

107using namespacellvm::PatternMatch;

108

109#define DEBUG_TYPE "codegenprepare"

110

111STATISTIC(NumBlocksElim,"Number of blocks eliminated");

112STATISTIC(NumPHIsElim,"Number of trivial PHIs eliminated");

113STATISTIC(NumGEPsElim,"Number of GEPs converted to casts");

114STATISTIC(NumCmpUses,"Number of uses of Cmp expressions replaced with uses of "

115"sunken Cmps");

116STATISTIC(NumCastUses,"Number of uses of Cast expressions replaced with uses "

117"of sunken Casts");

118STATISTIC(NumMemoryInsts,"Number of memory instructions whose address "

119"computations were sunk");

120STATISTIC(NumMemoryInstsPhiCreated,

121"Number of phis created when address "

122"computations were sunk to memory instructions");

123STATISTIC(NumMemoryInstsSelectCreated,

124"Number of select created when address "

125"computations were sunk to memory instructions");

126STATISTIC(NumExtsMoved,"Number of [s|z]ext instructions combined with loads");

127STATISTIC(NumExtUses,"Number of uses of [s|z]ext instructions optimized");

128STATISTIC(NumAndsAdded,

129"Number of and mask instructions added to form ext loads");

130STATISTIC(NumAndUses,"Number of uses of and mask instructions optimized");

131STATISTIC(NumRetsDup,"Number of return instructions duplicated");

132STATISTIC(NumDbgValueMoved,"Number of debug value instructions moved");

133STATISTIC(NumSelectsExpanded,"Number of selects turned into branches");

134STATISTIC(NumStoreExtractExposed,"Number of store(extractelement) exposed");

135

136staticcl::opt<bool>DisableBranchOpts(

137"disable-cgp-branch-opts",cl::Hidden,cl::init(false),

138cl::desc("Disable branch optimizations in CodeGenPrepare"));

139

140staticcl::opt<bool>

141DisableGCOpts("disable-cgp-gc-opts",cl::Hidden,cl::init(false),

142cl::desc("Disable GC optimizations in CodeGenPrepare"));

143

144staticcl::opt<bool>

145DisableSelectToBranch("disable-cgp-select2branch",cl::Hidden,

146cl::init(false),

147cl::desc("Disable select to branch conversion."));

148

149staticcl::opt<bool>

150AddrSinkUsingGEPs("addr-sink-using-gep",cl::Hidden,cl::init(true),

151cl::desc("Address sinking in CGP using GEPs."));

152

153staticcl::opt<bool>

154EnableAndCmpSinking("enable-andcmp-sinking",cl::Hidden,cl::init(true),

155cl::desc("Enable sinking and/cmp into branches."));

156

157staticcl::opt<bool>DisableStoreExtract(

158"disable-cgp-store-extract",cl::Hidden,cl::init(false),

159cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));

160

161staticcl::opt<bool>StressStoreExtract(

162"stress-cgp-store-extract",cl::Hidden,cl::init(false),

163cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));

164

165staticcl::opt<bool>DisableExtLdPromotion(

166"disable-cgp-ext-ld-promotion",cl::Hidden,cl::init(false),

167cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "

168"CodeGenPrepare"));

169

170staticcl::opt<bool>StressExtLdPromotion(

171"stress-cgp-ext-ld-promotion",cl::Hidden,cl::init(false),

172cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "

173"optimization in CodeGenPrepare"));

174

175staticcl::opt<bool>DisablePreheaderProtect(

176"disable-preheader-prot",cl::Hidden,cl::init(false),

177cl::desc("Disable protection against removing loop preheaders"));

178

179staticcl::opt<bool>ProfileGuidedSectionPrefix(

180"profile-guided-section-prefix",cl::Hidden,cl::init(true),

181cl::desc("Use profile info to add section prefix for hot/cold functions"));

182

183staticcl::opt<bool>ProfileUnknownInSpecialSection(

184"profile-unknown-in-special-section",cl::Hidden,

185cl::desc("In profiling mode like sampleFDO, if a function doesn't have "

186"profile, we cannot tell the function is cold for sure because "

187"it may be a function newly added without ever being sampled. "

188"With the flag enabled, compiler can put such profile unknown "

189"functions into a special section, so runtime system can choose "

190"to handle it in a different way than .text section, to save "

191"RAM for example. "));

192

193staticcl::opt<bool>BBSectionsGuidedSectionPrefix(

194"bbsections-guided-section-prefix",cl::Hidden,cl::init(true),

195cl::desc("Use the basic-block-sections profile to determine the text "

196"section prefix for hot functions. Functions with "

197"basic-block-sections profile will be placed in `.text.hot` "

198"regardless of their FDO profile info. Other functions won't be "

199"impacted, i.e., their prefixes will be decided by FDO/sampleFDO "

200"profiles."));

201

202staticcl::opt<uint64_t>FreqRatioToSkipMerge(

203"cgp-freq-ratio-to-skip-merge",cl::Hidden,cl::init(2),

204cl::desc("Skip merging empty blocks if (frequency of empty block) / "

205"(frequency of destination block) is greater than this ratio"));

206

207staticcl::opt<bool>ForceSplitStore(

208"force-split-store",cl::Hidden,cl::init(false),

209cl::desc("Force store splitting no matter what the target query says."));

210

211staticcl::opt<bool>EnableTypePromotionMerge(

212"cgp-type-promotion-merge",cl::Hidden,

213cl::desc("Enable merging of redundant sexts when one is dominating"

214" the other."),

215cl::init(true));

216

217staticcl::opt<bool>DisableComplexAddrModes(

218"disable-complex-addr-modes",cl::Hidden,cl::init(false),

219cl::desc("Disables combining addressing modes with different parts "

220"in optimizeMemoryInst."));

221

222staticcl::opt<bool>

223AddrSinkNewPhis("addr-sink-new-phis",cl::Hidden,cl::init(false),

224cl::desc("Allow creation of Phis in Address sinking."));

225

226staticcl::opt<bool>AddrSinkNewSelects(

227"addr-sink-new-select",cl::Hidden,cl::init(true),

228cl::desc("Allow creation of selects in Address sinking."));

229

230staticcl::opt<bool>AddrSinkCombineBaseReg(

231"addr-sink-combine-base-reg",cl::Hidden,cl::init(true),

232cl::desc("Allow combining of BaseReg field in Address sinking."));

233

234staticcl::opt<bool>AddrSinkCombineBaseGV(

235"addr-sink-combine-base-gv",cl::Hidden,cl::init(true),

236cl::desc("Allow combining of BaseGV field in Address sinking."));

237

238staticcl::opt<bool>AddrSinkCombineBaseOffs(

239"addr-sink-combine-base-offs",cl::Hidden,cl::init(true),

240cl::desc("Allow combining of BaseOffs field in Address sinking."));

241

242staticcl::opt<bool>AddrSinkCombineScaledReg(

243"addr-sink-combine-scaled-reg",cl::Hidden,cl::init(true),

244cl::desc("Allow combining of ScaledReg field in Address sinking."));

245

246staticcl::opt<bool>

247EnableGEPOffsetSplit("cgp-split-large-offset-gep",cl::Hidden,

248cl::init(true),

249cl::desc("Enable splitting large offset of GEP."));

250

251staticcl::opt<bool>EnableICMP_EQToICMP_ST(

252"cgp-icmp-eq2icmp-st",cl::Hidden,cl::init(false),

253cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));

254

255staticcl::opt<bool>

256VerifyBFIUpdates("cgp-verify-bfi-updates",cl::Hidden,cl::init(false),

257cl::desc("Enable BFI update verification for "

258"CodeGenPrepare."));

259

260staticcl::opt<bool>

261OptimizePhiTypes("cgp-optimize-phi-types",cl::Hidden,cl::init(true),

262cl::desc("Enable converting phi types in CodeGenPrepare"));

263

264staticcl::opt<unsigned>

265HugeFuncThresholdInCGPP("cgpp-huge-func",cl::init(10000),cl::Hidden,

266cl::desc("Least BB number of huge function."));

267

268staticcl::opt<unsigned>

269MaxAddressUsersToScan("cgp-max-address-users-to-scan",cl::init(100),

270cl::Hidden,

271cl::desc("Max number of address users to look at"));

272

273staticcl::opt<bool>

274DisableDeletePHIs("disable-cgp-delete-phis",cl::Hidden,cl::init(false),

275cl::desc("Disable elimination of dead PHI nodes."));

276

277namespace{

278

279enum ExtType {

280 ZeroExtension,// Zero extension has been seen.

281 SignExtension,// Sign extension has been seen.

282 BothExtension// This extension type is used if we saw sext after

283// ZeroExtension had been set, or if we saw zext after

284// SignExtension had been set. It makes the type

285// information of a promoted instruction invalid.

286};

287

288enum ModifyDT {

289 NotModifyDT,// Not Modify any DT.

290 ModifyBBDT,// Modify the Basic Block Dominator Tree.

291 ModifyInstDT// Modify the Instruction Dominator in a Basic Block,

292// This usually means we move/delete/insert instruction

293// in a Basic Block. So we should re-iterate instructions

294// in such Basic Block.

295};

296

297usingSetOfInstrs =SmallPtrSet<Instruction *, 16>;

298usingTypeIsSExt =PointerIntPair<Type *, 2, ExtType>;

299usingInstrToOrigTy =DenseMap<Instruction *, TypeIsSExt>;

300usingSExts =SmallVector<Instruction *, 16>;

301usingValueToSExts =MapVector<Value *, SExts>;

302

303classTypePromotionTransaction;

304

305classCodeGenPrepare {

306friendclassCodeGenPrepareLegacyPass;

307constTargetMachine *TM =nullptr;

308constTargetSubtargetInfo *SubtargetInfo =nullptr;

309constTargetLowering *TLI =nullptr;

310constTargetRegisterInfo *TRI =nullptr;

311constTargetTransformInfo *TTI =nullptr;

312constBasicBlockSectionsProfileReader *BBSectionsProfileReader =nullptr;

313constTargetLibraryInfo *TLInfo =nullptr;

314LoopInfo *LI =nullptr;

315 std::unique_ptr<BlockFrequencyInfo>BFI;

316 std::unique_ptr<BranchProbabilityInfo> BPI;

317ProfileSummaryInfo *PSI =nullptr;

318

319 /// As we scan instructions optimizing them, this is the next instruction

320 /// to optimize. Transforms that can invalidate this should update it.

321BasicBlock::iterator CurInstIterator;

322

323 /// Keeps track of non-local addresses that have been sunk into a block.

324 /// This allows us to avoid inserting duplicate code for blocks with

325 /// multiple load/stores of the same address. The usage of WeakTrackingVH

326 /// enables SunkAddrs to be treated as a cache whose entries can be

327 /// invalidated if a sunken address computation has been erased.

328ValueMap<Value *, WeakTrackingVH> SunkAddrs;

329

330 /// Keeps track of all instructions inserted for the current function.

331 SetOfInstrs InsertedInsts;

332

333 /// Keeps track of the type of the related instruction before their

334 /// promotion for the current function.

335 InstrToOrigTy PromotedInsts;

336

337 /// Keep track of instructions removed during promotion.

338 SetOfInstrs RemovedInsts;

339

340 /// Keep track of sext chains based on their initial value.

341DenseMap<Value *, Instruction *> SeenChainsForSExt;

342

343 /// Keep track of GEPs accessing the same data structures such as structs or

344 /// arrays that are candidates to be split later because of their large

345 /// size.

346MapVector<AssertingVH<Value>,

347SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>

348 LargeOffsetGEPMap;

349

350 /// Keep track of new GEP base after splitting the GEPs having large offset.

351SmallSet<AssertingVH<Value>, 2> NewGEPBases;

352

353 /// Map serial numbers to Large offset GEPs.

354DenseMap<AssertingVH<GetElementPtrInst>,int> LargeOffsetGEPID;

355

356 /// Keep track of SExt promoted.

357 ValueToSExts ValToSExtendedUses;

358

359 /// True if the function has the OptSize attribute.

360bool OptSize;

361

362 /// DataLayout for the Function being processed.

363constDataLayout *DL =nullptr;

364

365 /// Building the dominator tree can be expensive, so we only build it

366 /// lazily and update it when required.

367 std::unique_ptr<DominatorTree> DT;

368

369public:

370 CodeGenPrepare(){};

371 CodeGenPrepare(constTargetMachine *TM) :TM(TM){};

372 /// If encounter huge function, we need to limit the build time.

373bool IsHugeFunc =false;

374

375 /// FreshBBs is like worklist, it collected the updated BBs which need

376 /// to be optimized again.

377 /// Note: Consider building time in this pass, when a BB updated, we need

378 /// to insert such BB into FreshBBs for huge function.

379SmallSet<BasicBlock *, 32> FreshBBs;

380

381void releaseMemory() {

382// Clear per function information.

383 InsertedInsts.clear();

384 PromotedInsts.clear();

385 FreshBBs.clear();

386 BPI.reset();

387BFI.reset();

388 }

389

390boolrun(Function &F,FunctionAnalysisManager &AM);

391

392private:

393template <typename F>

394void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB,F f) {

395// Substituting can cause recursive simplifications, which can invalidate

396// our iterator. Use a WeakTrackingVH to hold onto it in case this

397// happens.

398Value *CurValue = &*CurInstIterator;

399WeakTrackingVH IterHandle(CurValue);

400

401f();

402

403// If the iterator instruction was recursively deleted, start over at the

404// start of the block.

405if (IterHandle != CurValue) {

406 CurInstIterator = BB->begin();

407 SunkAddrs.clear();

408 }

409 }

410

411// Get the DominatorTree, building if necessary.

412DominatorTree &getDT(Function &F) {

413if (!DT)

414 DT = std::make_unique<DominatorTree>(F);

415return *DT;

416 }

417

418void removeAllAssertingVHReferences(Value *V);

419bool eliminateAssumptions(Function &F);

420bool eliminateFallThrough(Function &F,DominatorTree *DT =nullptr);

421bool eliminateMostlyEmptyBlocks(Function &F);

422BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);

423bool canMergeBlocks(constBasicBlock *BB,constBasicBlock *DestBB)const;

424void eliminateMostlyEmptyBlock(BasicBlock *BB);

425bool isMergingEmptyBlockProfitable(BasicBlock *BB,BasicBlock *DestBB,

426bool isPreheader);

427bool makeBitReverse(Instruction &I);

428booloptimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);

429bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);

430bool optimizeMemoryInst(Instruction *MemoryInst,Value *Addr,Type *AccessTy,

431unsigned AddrSpace);

432bool optimizeGatherScatterInst(Instruction *MemoryInst,Value *Ptr);

433bool optimizeInlineAsmInst(CallInst *CS);

434booloptimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);

435bool optimizeExt(Instruction *&I);

436bool optimizeExtUses(Instruction *I);

437bool optimizeLoadExt(LoadInst *Load);

438bool optimizeShiftInst(BinaryOperator *BO);

439bool optimizeFunnelShift(IntrinsicInst *Fsh);

440bool optimizeSelectInst(SelectInst *SI);

441bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);

442bool optimizeSwitchType(SwitchInst *SI);

443bool optimizeSwitchPhiConstants(SwitchInst *SI);

444bool optimizeSwitchInst(SwitchInst *SI);

445bool optimizeExtractElementInst(Instruction *Inst);

446bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);

447bool fixupDbgValue(Instruction *I);

448bool fixupDbgVariableRecord(DbgVariableRecord &I);

449bool fixupDbgVariableRecordsOnInst(Instruction &I);

450bool placeDbgValues(Function &F);

451bool placePseudoProbes(Function &F);

452bool canFormExtLd(constSmallVectorImpl<Instruction *> &MovedExts,

453LoadInst *&LI,Instruction *&Inst,bool HasPromoted);

454bool tryToPromoteExts(TypePromotionTransaction &TPT,

455constSmallVectorImpl<Instruction *> &Exts,

456SmallVectorImpl<Instruction *> &ProfitablyMovedExts,

457unsigned CreatedInstsCost = 0);

458bool mergeSExts(Function &F);

459bool splitLargeGEPOffsets();

460bool optimizePhiType(PHINode *Inst,SmallPtrSetImpl<PHINode *> &Visited,

461SmallPtrSetImpl<Instruction *> &DeletedInstrs);

462bool optimizePhiTypes(Function &F);

463bool performAddressTypePromotion(

464Instruction *&Inst,bool AllowPromotionWithoutCommonHeader,

465bool HasPromoted, TypePromotionTransaction &TPT,

466SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);

467bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);

468bool simplifyOffsetableRelocate(GCStatepointInst &I);

469

470bool tryToSinkFreeOperands(Instruction *I);

471bool replaceMathCmpWithIntrinsic(BinaryOperator *BO,Value *Arg0,Value *Arg1,

472CmpInst *Cmp,Intrinsic::ID IID);

473bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);

474bool optimizeURem(Instruction *Rem);

475bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);

476bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);

477void verifyBFIUpdates(Function &F);

478bool _run(Function &F);

479};

480

481classCodeGenPrepareLegacyPass :publicFunctionPass {

482public:

483staticcharID;// Pass identification, replacement for typeid

484

485 CodeGenPrepareLegacyPass() :FunctionPass(ID) {

486initializeCodeGenPrepareLegacyPassPass(*PassRegistry::getPassRegistry());

487 }

488

489boolrunOnFunction(Function &F)override;

490

491StringRef getPassName() const override{return"CodeGen Prepare"; }

492

493voidgetAnalysisUsage(AnalysisUsage &AU) const override{

494// FIXME: When we can selectively preserve passes, preserve the domtree.

495 AU.addRequired<ProfileSummaryInfoWrapperPass>();

496 AU.addRequired<TargetLibraryInfoWrapperPass>();

497 AU.addRequired<TargetPassConfig>();

498 AU.addRequired<TargetTransformInfoWrapperPass>();

499 AU.addRequired<LoopInfoWrapperPass>();

500 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();

501 }

502};

503

504}// end anonymous namespace

505

506char CodeGenPrepareLegacyPass::ID = 0;

507

508bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {

509if (skipFunction(F))

510returnfalse;

511autoTM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();

512 CodeGenPrepare CGP(TM);

513 CGP.DL = &F.getDataLayout();

514 CGP.SubtargetInfo =TM->getSubtargetImpl(F);

515 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();

516 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();

517 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);

518 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

519 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

520 CGP.BPI.reset(newBranchProbabilityInfo(F, *CGP.LI));

521 CGP.BFI.reset(newBlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));

522 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();

523auto BBSPRWP =

524 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();

525 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() :nullptr;

526

527return CGP._run(F);

528}

529

530INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass,DEBUG_TYPE,

531"Optimize for code generation",false,false)

532INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)

533INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

534INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)

535INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

536INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

537INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

538INITIALIZE_PASS_END(CodeGenPrepareLegacyPass,DEBUG_TYPE,

539 "Optimize for codegeneration",false,false)

540

541FunctionPass *llvm::createCodeGenPrepareLegacyPass() {

542returnnew CodeGenPrepareLegacyPass();

543}

544

545PreservedAnalyses CodeGenPreparePass::run(Function &F,

546FunctionAnalysisManager &AM) {

547 CodeGenPrepare CGP(TM);

548

549bool Changed = CGP.run(F, AM);

550if (!Changed)

551returnPreservedAnalyses::all();

552

553PreservedAnalyses PA;

554 PA.preserve<TargetLibraryAnalysis>();

555 PA.preserve<TargetIRAnalysis>();

556 PA.preserve<LoopAnalysis>();

557return PA;

558}

559

560bool CodeGenPrepare::run(Function &F,FunctionAnalysisManager &AM) {

561DL = &F.getDataLayout();

562 SubtargetInfo = TM->getSubtargetImpl(F);

563 TLI = SubtargetInfo->getTargetLowering();

564TRI = SubtargetInfo->getRegisterInfo();

565 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);

566TTI = &AM.getResult<TargetIRAnalysis>(F);

567 LI = &AM.getResult<LoopAnalysis>(F);

568 BPI.reset(newBranchProbabilityInfo(F, *LI));

569 BFI.reset(newBlockFrequencyInfo(F, *BPI, *LI));

570auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);

571 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());

572 BBSectionsProfileReader =

573 AM.getCachedResult<BasicBlockSectionsProfileReaderAnalysis>(F);

574return _run(F);

575}

576

577bool CodeGenPrepare::_run(Function &F) {

578bool EverMadeChange =false;

579

580 OptSize =F.hasOptSize();

581// Use the basic-block-sections profile to promote hot functions to .text.hot

582// if requested.

583if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&

584 BBSectionsProfileReader->isFunctionHot(F.getName())) {

585F.setSectionPrefix("hot");

586 }elseif (ProfileGuidedSectionPrefix) {

587// The hot attribute overwrites profile count based hotness while profile

588// counts based hotness overwrite the cold attribute.

589// This is a conservative behabvior.

590if (F.hasFnAttribute(Attribute::Hot) ||

591 PSI->isFunctionHotInCallGraph(&F, *BFI))

592F.setSectionPrefix("hot");

593// If PSI shows this function is not hot, we will placed the function

594// into unlikely section if (1) PSI shows this is a cold function, or

595// (2) the function has a attribute of cold.

596elseif (PSI->isFunctionColdInCallGraph(&F, *BFI) ||

597F.hasFnAttribute(Attribute::Cold))

598F.setSectionPrefix("unlikely");

599elseif (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&

600 PSI->isFunctionHotnessUnknown(F))

601F.setSectionPrefix("unknown");

602 }

603

604 /// This optimization identifies DIV instructions that can be

605 /// profitably bypassed and carried out with a shorter, faster divide.

606if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {

607constDenseMap<unsigned int, unsigned int> &BypassWidths =

608 TLI->getBypassSlowDivWidths();

609BasicBlock *BB = &*F.begin();

610while (BB !=nullptr) {

611// bypassSlowDivision may create new BBs, but we don't want to reapply the

612// optimization to those blocks.

613BasicBlock *Next = BB->getNextNode();

614if (!llvm::shouldOptimizeForSize(BB, PSI,BFI.get()))

615 EverMadeChange |=bypassSlowDivision(BB, BypassWidths);

616 BB = Next;

617 }

618 }

619

620// Get rid of @llvm.assume builtins before attempting to eliminate empty

621// blocks, since there might be blocks that only contain @llvm.assume calls

622// (plus arguments that we can get rid of).

623 EverMadeChange |= eliminateAssumptions(F);

624

625// Eliminate blocks that contain only PHI nodes and an

626// unconditional branch.

627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);

628

629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;

630if (!DisableBranchOpts)

631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);

632

633// Split some critical edges where one of the sources is an indirect branch,

634// to help generate sane code for PHIs involving such edges.

635 EverMadeChange |=

636SplitIndirectBrCriticalEdges(F,/*IgnoreBlocksWithoutPHI=*/true);

637

638// If we are optimzing huge function, we need to consider the build time.

639// Because the basic algorithm's complex is near O(N!).

640 IsHugeFunc =F.size() >HugeFuncThresholdInCGPP;

641

642// Transformations above may invalidate dominator tree and/or loop info.

643 DT.reset();

644 LI->releaseMemory();

645 LI->analyze(getDT(F));

646

647bool MadeChange =true;

648bool FuncIterated =false;

649while (MadeChange) {

650 MadeChange =false;

651

652for (BasicBlock &BB :llvm::make_early_inc_range(F)) {

653if (FuncIterated && !FreshBBs.contains(&BB))

654continue;

655

656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;

657bool Changed =optimizeBlock(BB, ModifiedDTOnIteration);

658

659if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)

660 DT.reset();

661

662 MadeChange |= Changed;

663if (IsHugeFunc) {

664// If the BB is updated, it may still has chance to be optimized.

665// This usually happen at sink optimization.

666// For example:

667//

668// bb0：

669// %and = and i32 %a, 4

670// %cmp = icmp eq i32 %and, 0

671//

672// If the %cmp sink to other BB, the %and will has chance to sink.

673if (Changed)

674 FreshBBs.insert(&BB);

675elseif (FuncIterated)

676 FreshBBs.erase(&BB);

677 }else {

678// For small/normal functions, we restart BB iteration if the dominator

679// tree of the Function was changed.

680if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)

681break;

682 }

683 }

684// We have iterated all the BB in the (only work for huge) function.

685 FuncIterated = IsHugeFunc;

686

687if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())

688 MadeChange |= mergeSExts(F);

689if (!LargeOffsetGEPMap.empty())

690 MadeChange |= splitLargeGEPOffsets();

691 MadeChange |= optimizePhiTypes(F);

692

693if (MadeChange)

694 eliminateFallThrough(F, DT.get());

695

696#ifndef NDEBUG

697if (MadeChange &&VerifyLoopInfo)

698 LI->verify(getDT(F));

699#endif

700

701// Really free removed instructions during promotion.

702for (Instruction *I : RemovedInsts)

703I->deleteValue();

704

705 EverMadeChange |= MadeChange;

706 SeenChainsForSExt.clear();

707 ValToSExtendedUses.clear();

708 RemovedInsts.clear();

709 LargeOffsetGEPMap.clear();

710 LargeOffsetGEPID.clear();

711 }

712

713 NewGEPBases.clear();

714 SunkAddrs.clear();

715

716if (!DisableBranchOpts) {

717 MadeChange =false;

718// Use a set vector to get deterministic iteration order. The order the

719// blocks are removed may affect whether or not PHI nodes in successors

720// are removed.

721SmallSetVector<BasicBlock *, 8> WorkList;

722for (BasicBlock &BB :F) {

723SmallVector<BasicBlock *, 2> Successors(successors(&BB));

724 MadeChange |=ConstantFoldTerminator(&BB,true);

725if (!MadeChange)

726continue;

727

728for (BasicBlock *Succ : Successors)

729if (pred_empty(Succ))

730 WorkList.insert(Succ);

731 }

732

733// Delete the dead blocks and any of their dead successors.

734 MadeChange |= !WorkList.empty();

735while (!WorkList.empty()) {

736BasicBlock *BB = WorkList.pop_back_val();

737SmallVector<BasicBlock *, 2> Successors(successors(BB));

738

739DeleteDeadBlock(BB);

740

741for (BasicBlock *Succ : Successors)

742if (pred_empty(Succ))

743 WorkList.insert(Succ);

744 }

745

746// Merge pairs of basic blocks with unconditional branches, connected by

747// a single edge.

748if (EverMadeChange || MadeChange)

749 MadeChange |= eliminateFallThrough(F);

750

751 EverMadeChange |= MadeChange;

752 }

753

754if (!DisableGCOpts) {

755SmallVector<GCStatepointInst *, 2> Statepoints;

756for (BasicBlock &BB :F)

757for (Instruction &I : BB)

758if (auto *SP = dyn_cast<GCStatepointInst>(&I))

759 Statepoints.push_back(SP);

760for (auto &I : Statepoints)

761 EverMadeChange |= simplifyOffsetableRelocate(*I);

762 }

763

764// Do this last to clean up use-before-def scenarios introduced by other

765// preparatory transforms.

766 EverMadeChange |= placeDbgValues(F);

767 EverMadeChange |= placePseudoProbes(F);

768

769#ifndef NDEBUG

770if (VerifyBFIUpdates)

771 verifyBFIUpdates(F);

772#endif

773

774return EverMadeChange;

775}

776

777bool CodeGenPrepare::eliminateAssumptions(Function &F) {

778bool MadeChange =false;

779for (BasicBlock &BB :F) {

780 CurInstIterator = BB.begin();

781while (CurInstIterator != BB.end()) {

782Instruction *I = &*(CurInstIterator++);

783if (auto *Assume = dyn_cast<AssumeInst>(I)) {

784 MadeChange =true;

785Value *Operand =Assume->getOperand(0);

786Assume->eraseFromParent();

787

788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {

789RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo,nullptr);

790 });

791 }

792 }

793 }

794return MadeChange;

795}

796

797/// An instruction is about to be deleted, so remove all references to it in our

798/// GEP-tracking data strcutures.

799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {

800 LargeOffsetGEPMap.erase(V);

801 NewGEPBases.erase(V);

802

803autoGEP = dyn_cast<GetElementPtrInst>(V);

804if (!GEP)

805return;

806

807 LargeOffsetGEPID.erase(GEP);

808

809auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());

810if (VecI == LargeOffsetGEPMap.end())

811return;

812

813auto &GEPVector = VecI->second;

814llvm::erase_if(GEPVector, [=](auto &Elt) {return Elt.first ==GEP; });

815

816if (GEPVector.empty())

817 LargeOffsetGEPMap.erase(VecI);

818}

819

820// Verify BFI has been updated correctly by recomputing BFI and comparing them.

821voidLLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {

822DominatorTree NewDT(F);

823LoopInfo NewLI(NewDT);

824BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);

825BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);

826 NewBFI.verifyMatch(*BFI);

827}

828

829/// Merge basic blocks which are connected by a single edge, where one of the

830/// basic blocks has a single successor pointing to the other basic block,

831/// which has a single predecessor.

832bool CodeGenPrepare::eliminateFallThrough(Function &F,DominatorTree *DT) {

833bool Changed =false;

834// Scan all of the blocks in the function, except for the entry block.

835// Use a temporary array to avoid iterator being invalidated when

836// deleting blocks.

837SmallVector<WeakTrackingVH, 16>Blocks;

838for (auto &Block :llvm::drop_begin(F))

839Blocks.push_back(&Block);

840

841SmallSet<WeakTrackingVH, 16> Preds;

842for (auto &Block :Blocks) {

843auto *BB = cast_or_null<BasicBlock>(Block);

844if (!BB)

845continue;

846// If the destination block has a single pred, then this is a trivial

847// edge, just collapse it.

848BasicBlock *SinglePred = BB->getSinglePredecessor();

849

850// Don't merge if BB's address is taken.

851if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())

852continue;

853

854// Make an effort to skip unreachable blocks.

855if (DT && !DT->isReachableFromEntry(BB))

856continue;

857

858BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());

859if (Term && !Term->isConditional()) {

860 Changed =true;

861LLVM_DEBUG(dbgs() <<"To merge:\n" << *BB <<"\n\n\n");

862

863// Merge BB into SinglePred and delete it.

864MergeBlockIntoPredecessor(BB,/* DTU */nullptr, LI,/* MSSAU */nullptr,

865/* MemDep */nullptr,

866/* PredecessorWithTwoSuccessors */false, DT);

867 Preds.insert(SinglePred);

868

869if (IsHugeFunc) {

870// Update FreshBBs to optimize the merged BB.

871 FreshBBs.insert(SinglePred);

872 FreshBBs.erase(BB);

873 }

874 }

875 }

876

877// (Repeatedly) merging blocks into their predecessors can create redundant

878// debug intrinsics.

879for (constauto &Pred : Preds)

880if (auto *BB = cast_or_null<BasicBlock>(Pred))

881RemoveRedundantDbgInstrs(BB);

882

883return Changed;

884}

885

886/// Find a destination block from BB if BB is mergeable empty block.

887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {

888// If this block doesn't end with an uncond branch, ignore it.

889BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());

890if (!BI || !BI->isUnconditional())

891returnnullptr;

892

893// If the instruction before the branch (skipping debug info) isn't a phi

894// node, then other stuff is happening here.

895BasicBlock::iterator BBI = BI->getIterator();

896if (BBI != BB->begin()) {

897 --BBI;

898while (isa<DbgInfoIntrinsic>(BBI)) {

899if (BBI == BB->begin())

900break;

901 --BBI;

902 }

903if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))

904returnnullptr;

905 }

906

907// Do not break infinite loops.

908BasicBlock *DestBB = BI->getSuccessor(0);

909if (DestBB == BB)

910returnnullptr;

911

912if (!canMergeBlocks(BB, DestBB))

913 DestBB =nullptr;

914

915return DestBB;

916}

917

918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an

919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split

920/// edges in ways that are non-optimal for isel. Start by eliminating these

921/// blocks so we can split them the way we want them.

922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {

923SmallPtrSet<BasicBlock *, 16> Preheaders;

924SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());

925while (!LoopList.empty()) {

926Loop *L = LoopList.pop_back_val();

927llvm::append_range(LoopList, *L);

928if (BasicBlock *Preheader =L->getLoopPreheader())

929 Preheaders.insert(Preheader);

930 }

931

932bool MadeChange =false;

933// Copy blocks into a temporary array to avoid iterator invalidation issues

934// as we remove them.

935// Note that this intentionally skips the entry block.

936SmallVector<WeakTrackingVH, 16>Blocks;

937for (auto &Block :llvm::drop_begin(F)) {

938// Delete phi nodes that could block deleting other empty blocks.

939if (!DisableDeletePHIs)

940 MadeChange |=DeleteDeadPHIs(&Block, TLInfo);

941Blocks.push_back(&Block);

942 }

943

944for (auto &Block :Blocks) {

945BasicBlock *BB = cast_or_null<BasicBlock>(Block);

946if (!BB)

947continue;

948BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);

949if (!DestBB ||

950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))

951continue;

952

953 eliminateMostlyEmptyBlock(BB);

954 MadeChange =true;

955 }

956return MadeChange;

957}

958

959bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,

960BasicBlock *DestBB,

961bool isPreheader) {

962// Do not delete loop preheaders if doing so would create a critical edge.

963// Loop preheaders can be good locations to spill registers. If the

964// preheader is deleted and we create a critical edge, registers may be

965// spilled in the loop body instead.

966if (!DisablePreheaderProtect && isPreheader &&

967 !(BB->getSinglePredecessor() &&

968 BB->getSinglePredecessor()->getSingleSuccessor()))

969returnfalse;

970

971// Skip merging if the block's successor is also a successor to any callbr

972// that leads to this block.

973// FIXME: Is this really needed? Is this a correctness issue?

974for (BasicBlock *Pred :predecessors(BB)) {

975if (isa<CallBrInst>(Pred->getTerminator()) &&

976llvm::is_contained(successors(Pred), DestBB))

977returnfalse;

978 }

979

980// Try to skip merging if the unique predecessor of BB is terminated by a

981// switch or indirect branch instruction, and BB is used as an incoming block

982// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to

983// add COPY instructions in the predecessor of BB instead of BB (if it is not

984// merged). Note that the critical edge created by merging such blocks wont be

985// split in MachineSink because the jump table is not analyzable. By keeping

986// such empty block (BB), ISel will place COPY instructions in BB, not in the

987// predecessor of BB.

988BasicBlock *Pred = BB->getUniquePredecessor();

989if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||

990 isa<IndirectBrInst>(Pred->getTerminator())))

991returntrue;

992

993if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())

994returntrue;

995

996// We use a simple cost heuristic which determine skipping merging is

997// profitable if the cost of skipping merging is less than the cost of

998// merging : Cost(skipping merging) < Cost(merging BB), where the

999// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and

1000// the Cost(merging BB) is Freq(Pred) * Cost(Copy).

1001// Assuming Cost(Copy) == Cost(Branch), we could simplify it to :

1002// Freq(Pred) / Freq(BB) > 2.

1003// Note that if there are multiple empty blocks sharing the same incoming

1004// value for the PHIs in the DestBB, we consider them together. In such

1005// case, Cost(merging BB) will be the sum of their frequencies.

1006

1007if (!isa<PHINode>(DestBB->begin()))

1008returntrue;

1009

1010SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;

1011

1012// Find all other incoming blocks from which incoming values of all PHIs in

1013// DestBB are the same as the ones from BB.

1014for (BasicBlock *DestBBPred :predecessors(DestBB)) {

1015if (DestBBPred == BB)

1016continue;

1017

1018if (llvm::all_of(DestBB->phis(), [&](constPHINode &DestPN) {

1019 return DestPN.getIncomingValueForBlock(BB) ==

1020 DestPN.getIncomingValueForBlock(DestBBPred);

1021 }))

1022 SameIncomingValueBBs.insert(DestBBPred);

1023 }

1024

1025// See if all BB's incoming values are same as the value from Pred. In this

1026// case, no reason to skip merging because COPYs are expected to be place in

1027// Pred already.

1028if (SameIncomingValueBBs.count(Pred))

1029returntrue;

1030

1031BlockFrequency PredFreq =BFI->getBlockFreq(Pred);

1032BlockFrequency BBFreq =BFI->getBlockFreq(BB);

1033

1034for (auto *SameValueBB : SameIncomingValueBBs)

1035if (SameValueBB->getUniquePredecessor() == Pred &&

1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))

1037BBFreq +=BFI->getBlockFreq(SameValueBB);

1038

1039 std::optional<BlockFrequency> Limit =BBFreq.mul(FreqRatioToSkipMerge);

1040return !Limit || PredFreq <= *Limit;

1041}

1042

1043/// Return true if we can merge BB into DestBB if there is a single

1044/// unconditional branch between them, and BB contains no other non-phi

1045/// instructions.

1046bool CodeGenPrepare::canMergeBlocks(constBasicBlock *BB,

1047constBasicBlock *DestBB) const{

1048// We only want to eliminate blocks whose phi nodes are used by phi nodes in

1049// the successor. If there are more complex condition (e.g. preheaders),

1050// don't mess around with them.

1051for (constPHINode &PN : BB->phis()) {

1052for (constUser *U : PN.users()) {

1053constInstruction *UI = cast<Instruction>(U);

1054if (UI->getParent() != DestBB || !isa<PHINode>(UI))

1055returnfalse;

1056// If User is inside DestBB block and it is a PHINode then check

1057// incoming value. If incoming value is not from BB then this is

1058// a complex condition (e.g. preheaders) we want to avoid here.

1059if (UI->getParent() == DestBB) {

1060if (constPHINode *UPN = dyn_cast<PHINode>(UI))

1061for (unsignedI = 0, E = UPN->getNumIncomingValues();I != E; ++I) {

1062Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));

1063if (Insn &&Insn->getParent() == BB &&

1064Insn->getParent() != UPN->getIncomingBlock(I))

1065returnfalse;

1066 }

1067 }

1068 }

1069 }

1070

1071// If BB and DestBB contain any common predecessors, then the phi nodes in BB

1072// and DestBB may have conflicting incoming values for the block. If so, we

1073// can't merge the block.

1074constPHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());

1075if (!DestBBPN)

1076returntrue;// no conflict.

1077

1078// Collect the preds of BB.

1079SmallPtrSet<const BasicBlock *, 16> BBPreds;

1080if (constPHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {

1081// It is faster to get preds from a PHI than with pred_iterator.

1082for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)

1083 BBPreds.insert(BBPN->getIncomingBlock(i));

1084 }else {

1085 BBPreds.insert(pred_begin(BB),pred_end(BB));

1086 }

1087

1088// Walk the preds of DestBB.

1089for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {

1090BasicBlock *Pred = DestBBPN->getIncomingBlock(i);

1091if (BBPreds.count(Pred)) {// Common predecessor?

1092for (constPHINode &PN : DestBB->phis()) {

1093constValue *V1 = PN.getIncomingValueForBlock(Pred);

1094constValue *V2 = PN.getIncomingValueForBlock(BB);

1095

1096// If V2 is a phi node in BB, look up what the mapped value will be.

1097if (constPHINode *V2PN = dyn_cast<PHINode>(V2))

1098if (V2PN->getParent() == BB)

1099V2 = V2PN->getIncomingValueForBlock(Pred);

1100

1101// If there is a conflict, bail out.

1102if (V1 != V2)

1103returnfalse;

1104 }

1105 }

1106 }

1107

1108returntrue;

1109}

1110

1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.

1112staticvoidreplaceAllUsesWith(Value *Old,Value *New,

1113SmallSet<BasicBlock *, 32> &FreshBBs,

1114bool IsHuge) {

1115auto *OldI = dyn_cast<Instruction>(Old);

1116if (OldI) {

1117for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();

1118 UI != E; ++UI) {

1119Instruction *User = cast<Instruction>(*UI);

1120if (IsHuge)

1121 FreshBBs.insert(User->getParent());

1122 }

1123 }

1124 Old->replaceAllUsesWith(New);

1125}

1126

1127/// Eliminate a basic block that has only phi's and an unconditional branch in

1128/// it.

1129void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {

1130BranchInst *BI = cast<BranchInst>(BB->getTerminator());

1131BasicBlock *DestBB = BI->getSuccessor(0);

1132

1133LLVM_DEBUG(dbgs() <<"MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"

1134 << *BB << *DestBB);

1135

1136// If the destination block has a single pred, then this is a trivial edge,

1137// just collapse it.

1138if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {

1139if (SinglePred != DestBB) {

1140assert(SinglePred == BB &&

1141"Single predecessor not the same as predecessor");

1142// Merge DestBB into SinglePred/BB and delete it.

1143MergeBlockIntoPredecessor(DestBB);

1144// Note: BB(=SinglePred) will not be deleted on this path.

1145// DestBB(=its single successor) is the one that was deleted.

1146LLVM_DEBUG(dbgs() <<"AFTER:\n" << *SinglePred <<"\n\n\n");

1147

1148if (IsHugeFunc) {

1149// Update FreshBBs to optimize the merged BB.

1150 FreshBBs.insert(SinglePred);

1151 FreshBBs.erase(DestBB);

1152 }

1153return;

1154 }

1155 }

1156

1157// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB

1158// to handle the new incoming edges it is about to have.

1159for (PHINode &PN : DestBB->phis()) {

1160// Remove the incoming value for BB, and remember it.

1161Value *InVal = PN.removeIncomingValue(BB,false);

1162

1163// Two options: either the InVal is a phi node defined in BB or it is some

1164// value that dominates BB.

1165PHINode *InValPhi = dyn_cast<PHINode>(InVal);

1166if (InValPhi && InValPhi->getParent() == BB) {

1167// Add all of the input values of the input PHI as inputs of this phi.

1168for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)

1169 PN.addIncoming(InValPhi->getIncomingValue(i),

1170 InValPhi->getIncomingBlock(i));

1171 }else {

1172// Otherwise, add one instance of the dominating value for each edge that

1173// we will be adding.

1174if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {

1175for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)

1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));

1177 }else {

1178for (BasicBlock *Pred :predecessors(BB))

1179 PN.addIncoming(InVal, Pred);

1180 }

1181 }

1182 }

1183

1184// Preserve loop Metadata.

1185if (BI->hasMetadata(LLVMContext::MD_loop)) {

1186for (auto *Pred :predecessors(BB))

1187 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);

1188 }

1189

1190// The PHIs are now updated, change everything that refers to BB to use

1191// DestBB and remove BB.

1192 BB->replaceAllUsesWith(DestBB);

1193 BB->eraseFromParent();

1194 ++NumBlocksElim;

1195

1196LLVM_DEBUG(dbgs() <<"AFTER:\n" << *DestBB <<"\n\n\n");

1197}

1198

1199// Computes a map of base pointer relocation instructions to corresponding

1200// derived pointer relocation instructions given a vector of all relocate calls

1201staticvoidcomputeBaseDerivedRelocateMap(

1202constSmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,

1203MapVector<GCRelocateInst *,SmallVector<GCRelocateInst *, 0>>

1204 &RelocateInstMap) {

1205// Collect information in two maps: one primarily for locating the base object

1206// while filling the second map; the second map is the final structure holding

1207// a mapping between Base and corresponding Derived relocate calls

1208MapVector<std::pair<unsigned, unsigned>,GCRelocateInst *> RelocateIdxMap;

1209for (auto *ThisRelocate : AllRelocateCalls) {

1210auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),

1211 ThisRelocate->getDerivedPtrIndex());

1212 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));

1213 }

1214for (auto &Item : RelocateIdxMap) {

1215 std::pair<unsigned, unsigned> Key = Item.first;

1216if (Key.first == Key.second)

1217// Base relocation: nothing to insert

1218continue;

1219

1220GCRelocateInst *I = Item.second;

1221auto BaseKey = std::make_pair(Key.first, Key.first);

1222

1223// We're iterating over RelocateIdxMap so we cannot modify it.

1224auto MaybeBase = RelocateIdxMap.find(BaseKey);

1225if (MaybeBase == RelocateIdxMap.end())

1226// TODO: We might want to insert a new base object relocate and gep off

1227// that, if there are enough derived object relocates.

1228continue;

1229

1230 RelocateInstMap[MaybeBase->second].push_back(I);

1231 }

1232}

1233

1234// Accepts a GEP and extracts the operands into a vector provided they're all

1235// small integer constants

1236staticboolgetGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,

1237SmallVectorImpl<Value *> &OffsetV) {

1238for (unsigned i = 1; i <GEP->getNumOperands(); i++) {

1239// Only accept small constant integer operands

1240auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));

1241if (!Op ||Op->getZExtValue() > 20)

1242returnfalse;

1243 }

1244

1245for (unsigned i = 1; i <GEP->getNumOperands(); i++)

1246 OffsetV.push_back(GEP->getOperand(i));

1247returntrue;

1248}

1249

1250// Takes a RelocatedBase (base pointer relocation instruction) and Targets to

1251// replace, computes a replacement, and affects it.

1252staticbool

1253simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,

1254constSmallVectorImpl<GCRelocateInst *> &Targets) {

1255bool MadeChange =false;

1256// We must ensure the relocation of derived pointer is defined after

1257// relocation of base pointer. If we find a relocation corresponding to base

1258// defined earlier than relocation of base then we move relocation of base

1259// right before found relocation. We consider only relocation in the same

1260// basic block as relocation of base. Relocations from other basic block will

1261// be skipped by optimization and we do not care about them.

1262for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();

1263 &*R != RelocatedBase; ++R)

1264if (auto *RI = dyn_cast<GCRelocateInst>(R))

1265if (RI->getStatepoint() == RelocatedBase->getStatepoint())

1266if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {

1267 RelocatedBase->moveBefore(RI->getIterator());

1268 MadeChange =true;

1269break;

1270 }

1271

1272for (GCRelocateInst *ToReplace : Targets) {

1273assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&

1274"Not relocating a derived object of the original base object");

1275if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {

1276// A duplicate relocate call. TODO: coalesce duplicates.

1277continue;

1278 }

1279

1280if (RelocatedBase->getParent() != ToReplace->getParent()) {

1281// Base and derived relocates are in different basic blocks.

1282// In this case transform is only valid when base dominates derived

1283// relocate. However it would be too expensive to check dominance

1284// for each such relocate, so we skip the whole transformation.

1285continue;

1286 }

1287

1288Value *Base = ToReplace->getBasePtr();

1289auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());

1290if (!Derived || Derived->getPointerOperand() !=Base)

1291continue;

1292

1293SmallVector<Value *, 2> OffsetV;

1294if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))

1295continue;

1296

1297// Create a Builder and replace the target callsite with a gep

1298assert(RelocatedBase->getNextNode() &&

1299"Should always have one since it's not a terminator");

1300

1301// Insert after RelocatedBase

1302IRBuilder<> Builder(RelocatedBase->getNextNode());

1303 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());

1304

1305// If gc_relocate does not match the actual type, cast it to the right type.

1306// In theory, there must be a bitcast after gc_relocate if the type does not

1307// match, and we should reuse it to get the derived pointer. But it could be

1308// cases like this:

1309// bb1:

1310// ...

1311// %g1 = call coldcc i8 addrspace(1)*

1312// @llvm.experimental.gc.relocate.p1i8(...) br label %merge

1313//

1314// bb2:

1315// ...

1316// %g2 = call coldcc i8 addrspace(1)*

1317// @llvm.experimental.gc.relocate.p1i8(...) br label %merge

1318//

1319// merge:

1320// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]

1321// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*

1322//

1323// In this case, we can not find the bitcast any more. So we insert a new

1324// bitcast no matter there is already one or not. In this way, we can handle

1325// all cases, and the extra bitcast should be optimized away in later

1326// passes.

1327Value *ActualRelocatedBase = RelocatedBase;

1328if (RelocatedBase->getType() !=Base->getType()) {

1329 ActualRelocatedBase =

1330 Builder.CreateBitCast(RelocatedBase,Base->getType());

1331 }

1332Value *Replacement =

1333 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,

1334ArrayRef(OffsetV));

1335 Replacement->takeName(ToReplace);

1336// If the newly generated derived pointer's type does not match the original

1337// derived pointer's type, cast the new derived pointer to match it. Same

1338// reasoning as above.

1339Value *ActualReplacement = Replacement;

1340if (Replacement->getType() != ToReplace->getType()) {

1341 ActualReplacement =

1342 Builder.CreateBitCast(Replacement, ToReplace->getType());

1343 }

1344 ToReplace->replaceAllUsesWith(ActualReplacement);

1345 ToReplace->eraseFromParent();

1346

1347 MadeChange =true;

1348 }

1349return MadeChange;

1350}

1351

1352// Turns this:

1353//

1354// %base = ...

1355// %ptr = gep %base + 15

1356// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)

1357// %base' = relocate(%tok, i32 4, i32 4)

1358// %ptr' = relocate(%tok, i32 4, i32 5)

1359// %val = load %ptr'

1360//

1361// into this:

1362//

1363// %base = ...

1364// %ptr = gep %base + 15

1365// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)

1366// %base' = gc.relocate(%tok, i32 4, i32 4)

1367// %ptr' = gep %base' + 15

1368// %val = load %ptr'

1369bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {

1370bool MadeChange =false;

1371SmallVector<GCRelocateInst *, 2> AllRelocateCalls;

1372for (auto *U :I.users())

1373if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))

1374// Collect all the relocate calls associated with a statepoint

1375 AllRelocateCalls.push_back(Relocate);

1376

1377// We need at least one base pointer relocation + one derived pointer

1378// relocation to mangle

1379if (AllRelocateCalls.size() < 2)

1380returnfalse;

1381

1382// RelocateInstMap is a mapping from the base relocate instruction to the

1383// corresponding derived relocate instructions

1384MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;

1385computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);

1386if (RelocateInstMap.empty())

1387returnfalse;

1388

1389for (auto &Item : RelocateInstMap)

1390// Item.first is the RelocatedBase to offset against

1391// Item.second is the vector of Targets to replace

1392 MadeChange =simplifyRelocatesOffABase(Item.first, Item.second);

1393return MadeChange;

1394}

1395

1396/// Sink the specified cast instruction into its user blocks.

1397staticboolSinkCast(CastInst *CI) {

1398BasicBlock *DefBB = CI->getParent();

1399

1400 /// InsertedCasts - Only insert a cast in each block once.

1401DenseMap<BasicBlock *, CastInst *> InsertedCasts;

1402

1403bool MadeChange =false;

1404for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();

1405 UI != E;) {

1406Use &TheUse = UI.getUse();

1407Instruction *User = cast<Instruction>(*UI);

1408

1409// Figure out which BB this cast is used in. For PHI's this is the

1410// appropriate predecessor block.

1411BasicBlock *UserBB =User->getParent();

1412if (PHINode *PN = dyn_cast<PHINode>(User)) {

1413 UserBB = PN->getIncomingBlock(TheUse);

1414 }

1415

1416// Preincrement use iterator so we don't invalidate it.

1417 ++UI;

1418

1419// The first insertion point of a block containing an EH pad is after the

1420// pad. If the pad is the user, we cannot sink the cast past the pad.

1421if (User->isEHPad())

1422continue;

1423

1424// If the block selected to receive the cast is an EH pad that does not

1425// allow non-PHI instructions before the terminator, we can't sink the

1426// cast.

1427if (UserBB->getTerminator()->isEHPad())

1428continue;

1429

1430// If this user is in the same block as the cast, don't change the cast.

1431if (UserBB == DefBB)

1432continue;

1433

1434// If we have already inserted a cast into this block, use it.

1435CastInst *&InsertedCast = InsertedCasts[UserBB];

1436

1437if (!InsertedCast) {

1438BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

1439assert(InsertPt != UserBB->end());

1440 InsertedCast = cast<CastInst>(CI->clone());

1441 InsertedCast->insertBefore(*UserBB, InsertPt);

1442 }

1443

1444// Replace a use of the cast with a use of the new cast.

1445 TheUse = InsertedCast;

1446 MadeChange =true;

1447 ++NumCastUses;

1448 }

1449

1450// If we removed all uses, nuke the cast.

1451if (CI->use_empty()) {

1452salvageDebugInfo(*CI);

1453 CI->eraseFromParent();

1454 MadeChange =true;

1455 }

1456

1457return MadeChange;

1458}

1459

1460/// If the specified cast instruction is a noop copy (e.g. it's casting from

1461/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to

1462/// reduce the number of virtual registers that must be created and coalesced.

1463///

1464/// Return true if any changes are made.

1465staticboolOptimizeNoopCopyExpression(CastInst *CI,constTargetLowering &TLI,

1466constDataLayout &DL) {

1467// Sink only "cheap" (or nop) address-space casts. This is a weaker condition

1468// than sinking only nop casts, but is helpful on some platforms.

1469if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {

1470if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),

1471 ASC->getDestAddressSpace()))

1472returnfalse;

1473 }

1474

1475// If this is a noop copy,

1476EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());

1477EVT DstVT = TLI.getValueType(DL, CI->getType());

1478

1479// This is an fp<->int conversion?

1480if (SrcVT.isInteger() != DstVT.isInteger())

1481returnfalse;

1482

1483// If this is an extension, it will be a zero or sign extension, which

1484// isn't a noop.

1485if (SrcVT.bitsLT(DstVT))

1486returnfalse;

1487

1488// If these values will be promoted, find out what they will be promoted

1489// to. This helps us consider truncates on PPC as noop copies when they

1490// are.

1491if (TLI.getTypeAction(CI->getContext(), SrcVT) ==

1492TargetLowering::TypePromoteInteger)

1493 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);

1494if (TLI.getTypeAction(CI->getContext(), DstVT) ==

1495TargetLowering::TypePromoteInteger)

1496 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);

1497

1498// If, after promotion, these are the same types, this is a noop copy.

1499if (SrcVT != DstVT)

1500returnfalse;

1501

1502returnSinkCast(CI);

1503}

1504

1505// Match a simple increment by constant operation. Note that if a sub is

1506// matched, the step is negated (as if the step had been canonicalized to

1507// an add, even though we leave the instruction alone.)

1508staticboolmatchIncrement(constInstruction *IVInc,Instruction *&LHS,

1509Constant *&Step) {

1510if (match(IVInc,m_Add(m_Instruction(LHS),m_Constant(Step))) ||

1511match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(

1512m_Instruction(LHS),m_Constant(Step)))))

1513returntrue;

1514if (match(IVInc,m_Sub(m_Instruction(LHS),m_Constant(Step))) ||

1515match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(

1516m_Instruction(LHS),m_Constant(Step))))) {

1517 Step =ConstantExpr::getNeg(Step);

1518returntrue;

1519 }

1520returnfalse;

1521}

1522

1523/// If given \p PN is an inductive variable with value IVInc coming from the

1524/// backedge, and on each iteration it gets increased by Step, return pair

1525/// <IVInc, Step>. Otherwise, return std::nullopt.

1526static std::optional<std::pair<Instruction *, Constant *>>

1527getIVIncrement(constPHINode *PN,constLoopInfo *LI) {

1528constLoop *L = LI->getLoopFor(PN->getParent());

1529if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())

1530return std::nullopt;

1531auto *IVInc =

1532 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));

1533if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)

1534return std::nullopt;

1535Instruction *LHS =nullptr;

1536Constant *Step =nullptr;

1537if (matchIncrement(IVInc,LHS, Step) &&LHS == PN)

1538return std::make_pair(IVInc, Step);

1539return std::nullopt;

1540}

1541

1542staticboolisIVIncrement(constValue *V,constLoopInfo *LI) {

1543auto *I = dyn_cast<Instruction>(V);

1544if (!I)

1545returnfalse;

1546Instruction *LHS =nullptr;

1547Constant *Step =nullptr;

1548if (!matchIncrement(I,LHS, Step))

1549returnfalse;

1550if (auto *PN = dyn_cast<PHINode>(LHS))

1551if (auto IVInc =getIVIncrement(PN, LI))

1552return IVInc->first ==I;

1553returnfalse;

1554}

1555

1556bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,

1557Value *Arg0,Value *Arg1,

1558CmpInst *Cmp,

1559Intrinsic::ID IID) {

1560auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {

1561if (!isIVIncrement(BO, LI))

1562returnfalse;

1563constLoop *L = LI->getLoopFor(BO->getParent());

1564assert(L &&"L should not be null after isIVIncrement()");

1565// Do not risk on moving increment into a child loop.

1566if (LI->getLoopFor(Cmp->getParent()) != L)

1567returnfalse;

1568

1569// Finally, we need to ensure that the insert point will dominate all

1570// existing uses of the increment.

1571

1572auto &DT = getDT(*BO->getParent()->getParent());

1573if (DT.dominates(Cmp->getParent(), BO->getParent()))

1574// If we're moving up the dom tree, all uses are trivially dominated.

1575// (This is the common case for code produced by LSR.)

1576returntrue;

1577

1578// Otherwise, special case the single use in the phi recurrence.

1579return BO->hasOneUse() && DT.dominates(Cmp->getParent(),L->getLoopLatch());

1580 };

1581if (BO->getParent() !=Cmp->getParent() && !IsReplacableIVIncrement(BO)) {

1582// We used to use a dominator tree here to allow multi-block optimization.

1583// But that was problematic because:

1584// 1. It could cause a perf regression by hoisting the math op into the

1585// critical path.

1586// 2. It could cause a perf regression by creating a value that was live

1587// across multiple blocks and increasing register pressure.

1588// 3. Use of a dominator tree could cause large compile-time regression.

1589// This is because we recompute the DT on every change in the main CGP

1590// run-loop. The recomputing is probably unnecessary in many cases, so if

1591// that was fixed, using a DT here would be ok.

1592//

1593// There is one important particular case we still want to handle: if BO is

1594// the IV increment. Important properties that make it profitable:

1595// - We can speculate IV increment anywhere in the loop (as long as the

1596// indvar Phi is its only user);

1597// - Upon computing Cmp, we effectively compute something equivalent to the

1598// IV increment (despite it loops differently in the IR). So moving it up

1599// to the cmp point does not really increase register pressure.

1600returnfalse;

1601 }

1602

1603// We allow matching the canonical IR (add X, C) back to (usubo X, -C).

1604if (BO->getOpcode() == Instruction::Add &&

1605 IID == Intrinsic::usub_with_overflow) {

1606assert(isa<Constant>(Arg1) &&"Unexpected input for usubo");

1607 Arg1 =ConstantExpr::getNeg(cast<Constant>(Arg1));

1608 }

1609

1610// Insert at the first instruction of the pair.

1611Instruction *InsertPt =nullptr;

1612for (Instruction &Iter : *Cmp->getParent()) {

1613// If BO is an XOR, it is not guaranteed that it comes after both inputs to

1614// the overflow intrinsic are defined.

1615if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {

1616 InsertPt = &Iter;

1617break;

1618 }

1619 }

1620assert(InsertPt !=nullptr &&"Parent block did not contain cmp or binop");

1621

1622IRBuilder<> Builder(InsertPt);

1623Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);

1624if (BO->getOpcode() != Instruction::Xor) {

1625Value *Math = Builder.CreateExtractValue(MathOV, 0,"math");

1626replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);

1627 }else

1628assert(BO->hasOneUse() &&

1629"Patterns with XOr should use the BO only in the compare");

1630Value *OV = Builder.CreateExtractValue(MathOV, 1,"ov");

1631replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);

1632Cmp->eraseFromParent();

1633 BO->eraseFromParent();

1634returntrue;

1635}

1636

1637/// Match special-case patterns that check for unsigned add overflow.

1638staticboolmatchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,

1639BinaryOperator *&Add) {

1640// Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)

1641// Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)

1642Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);

1643

1644// We are not expecting non-canonical/degenerate code. Just bail out.

1645if (isa<Constant>(A))

1646returnfalse;

1647

1648ICmpInst::Predicate Pred = Cmp->getPredicate();

1649if (Pred ==ICmpInst::ICMP_EQ &&match(B,m_AllOnes()))

1650B = ConstantInt::get(B->getType(), 1);

1651elseif (Pred ==ICmpInst::ICMP_NE &&match(B,m_ZeroInt()))

1652B =Constant::getAllOnesValue(B->getType());

1653else

1654returnfalse;

1655

1656// Check the users of the variable operand of the compare looking for an add

1657// with the adjusted constant.

1658for (User *U :A->users()) {

1659if (match(U,m_Add(m_Specific(A),m_Specific(B)))) {

1660Add = cast<BinaryOperator>(U);

1661returntrue;

1662 }

1663 }

1664returnfalse;

1665}

1666

1667/// Try to combine the compare into a call to the llvm.uadd.with.overflow

1668/// intrinsic. Return true if any changes were made.

1669bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,

1670 ModifyDT &ModifiedDT) {

1671bool EdgeCase =false;

1672Value *A, *B;

1673BinaryOperator *Add;

1674if (!match(Cmp,m_UAddWithOverflow(m_Value(A),m_Value(B),m_BinOp(Add)))) {

1675if (!matchUAddWithOverflowConstantEdgeCases(Cmp,Add))

1676returnfalse;

1677// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.

1678A =Add->getOperand(0);

1679B =Add->getOperand(1);

1680 EdgeCase =true;

1681 }

1682

1683if (!TLI->shouldFormOverflowOp(ISD::UADDO,

1684 TLI->getValueType(*DL,Add->getType()),

1685Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))

1686returnfalse;

1687

1688// We don't want to move around uses of condition values this late, so we

1689// check if it is legal to create the call to the intrinsic in the basic

1690// block containing the icmp.

1691if (Add->getParent() !=Cmp->getParent() && !Add->hasOneUse())

1692returnfalse;

1693

1694if (!replaceMathCmpWithIntrinsic(Add,A,B, Cmp,

1695 Intrinsic::uadd_with_overflow))

1696returnfalse;

1697

1698// Reset callers - do not crash by iterating over a dead instruction.

1699 ModifiedDT = ModifyDT::ModifyInstDT;

1700returntrue;

1701}

1702

1703bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,

1704 ModifyDT &ModifiedDT) {

1705// We are not expecting non-canonical/degenerate code. Just bail out.

1706Value *A =Cmp->getOperand(0), *B =Cmp->getOperand(1);

1707if (isa<Constant>(A) && isa<Constant>(B))

1708returnfalse;

1709

1710// Convert (A u> B) to (A u< B) to simplify pattern matching.

1711ICmpInst::Predicate Pred =Cmp->getPredicate();

1712if (Pred ==ICmpInst::ICMP_UGT) {

1713std::swap(A,B);

1714 Pred =ICmpInst::ICMP_ULT;

1715 }

1716// Convert special-case: (A == 0) is the same as (A u< 1).

1717if (Pred ==ICmpInst::ICMP_EQ &&match(B,m_ZeroInt())) {

1718B = ConstantInt::get(B->getType(), 1);

1719 Pred =ICmpInst::ICMP_ULT;

1720 }

1721// Convert special-case: (A != 0) is the same as (0 u< A).

1722if (Pred ==ICmpInst::ICMP_NE &&match(B,m_ZeroInt())) {

1723std::swap(A,B);

1724 Pred =ICmpInst::ICMP_ULT;

1725 }

1726if (Pred !=ICmpInst::ICMP_ULT)

1727returnfalse;

1728

1729// Walk the users of a variable operand of a compare looking for a subtract or

1730// add with that same operand. Also match the 2nd operand of the compare to

1731// the add/sub, but that may be a negated constant operand of an add.

1732Value *CmpVariableOperand = isa<Constant>(A) ?B :A;

1733BinaryOperator *Sub =nullptr;

1734for (User *U : CmpVariableOperand->users()) {

1735// A - B, A u< B --> usubo(A, B)

1736if (match(U,m_Sub(m_Specific(A),m_Specific(B)))) {

1737 Sub = cast<BinaryOperator>(U);

1738break;

1739 }

1740

1741// A + (-C), A u< C (canonicalized form of (sub A, C))

1742constAPInt *CmpC, *AddC;

1743if (match(U,m_Add(m_Specific(A),m_APInt(AddC))) &&

1744match(B,m_APInt(CmpC)) && *AddC == -(*CmpC)) {

1745 Sub = cast<BinaryOperator>(U);

1746break;

1747 }

1748 }

1749if (!Sub)

1750returnfalse;

1751

1752if (!TLI->shouldFormOverflowOp(ISD::USUBO,

1753 TLI->getValueType(*DL, Sub->getType()),

1754 Sub->hasNUsesOrMore(1)))

1755returnfalse;

1756

1757if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),

1758 Cmp, Intrinsic::usub_with_overflow))

1759returnfalse;

1760

1761// Reset callers - do not crash by iterating over a dead instruction.

1762 ModifiedDT = ModifyDT::ModifyInstDT;

1763returntrue;

1764}

1765

1766/// Sink the given CmpInst into user blocks to reduce the number of virtual

1767/// registers that must be created and coalesced. This is a clear win except on

1768/// targets with multiple condition code registers (PowerPC), where it might

1769/// lose; some adjustment may be wanted there.

1770///

1771/// Return true if any changes are made.

1772staticboolsinkCmpExpression(CmpInst *Cmp,constTargetLowering &TLI) {

1773if (TLI.hasMultipleConditionRegisters())

1774returnfalse;

1775

1776// Avoid sinking soft-FP comparisons, since this can move them into a loop.

1777if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))

1778returnfalse;

1779

1780// Only insert a cmp in each block once.

1781DenseMap<BasicBlock *, CmpInst *> InsertedCmps;

1782

1783bool MadeChange =false;

1784for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();

1785 UI != E;) {

1786Use &TheUse = UI.getUse();

1787Instruction *User = cast<Instruction>(*UI);

1788

1789// Preincrement use iterator so we don't invalidate it.

1790 ++UI;

1791

1792// Don't bother for PHI nodes.

1793if (isa<PHINode>(User))

1794continue;

1795

1796// Figure out which BB this cmp is used in.

1797BasicBlock *UserBB =User->getParent();

1798BasicBlock *DefBB = Cmp->getParent();

1799

1800// If this user is in the same block as the cmp, don't change the cmp.

1801if (UserBB == DefBB)

1802continue;

1803

1804// If we have already inserted a cmp into this block, use it.

1805CmpInst *&InsertedCmp = InsertedCmps[UserBB];

1806

1807if (!InsertedCmp) {

1808BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

1809assert(InsertPt != UserBB->end());

1810 InsertedCmp =CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),

1811 Cmp->getOperand(0), Cmp->getOperand(1),"");

1812 InsertedCmp->insertBefore(*UserBB, InsertPt);

1813// Propagate the debug info.

1814 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());

1815 }

1816

1817// Replace a use of the cmp with a use of the new cmp.

1818 TheUse = InsertedCmp;

1819 MadeChange =true;

1820 ++NumCmpUses;

1821 }

1822

1823// If we removed all uses, nuke the cmp.

1824if (Cmp->use_empty()) {

1825 Cmp->eraseFromParent();

1826 MadeChange =true;

1827 }

1828

1829return MadeChange;

1830}

1831

1832/// For pattern like:

1833///

1834/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)

1835/// ...

1836/// DomBB:

1837/// ...

1838/// br DomCond, TrueBB, CmpBB

1839/// CmpBB: (with DomBB being the single predecessor)

1840/// ...

1841/// Cmp = icmp eq CmpOp0, CmpOp1

1842/// ...

1843///

1844/// It would use two comparison on targets that lowering of icmp sgt/slt is

1845/// different from lowering of icmp eq (PowerPC). This function try to convert

1846/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.

1847/// After that, DomCond and Cmp can use the same comparison so reduce one

1848/// comparison.

1849///

1850/// Return true if any changes are made.

1851staticboolfoldICmpWithDominatingICmp(CmpInst *Cmp,

1852constTargetLowering &TLI) {

1853if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())

1854returnfalse;

1855

1856ICmpInst::Predicate Pred = Cmp->getPredicate();

1857if (Pred !=ICmpInst::ICMP_EQ)

1858returnfalse;

1859

1860// If icmp eq has users other than BranchInst and SelectInst, converting it to

1861// icmp slt/sgt would introduce more redundant LLVM IR.

1862for (User *U : Cmp->users()) {

1863if (isa<BranchInst>(U))

1864continue;

1865if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)

1866continue;

1867returnfalse;

1868 }

1869

1870// This is a cheap/incomplete check for dominance - just match a single

1871// predecessor with a conditional branch.

1872BasicBlock *CmpBB = Cmp->getParent();

1873BasicBlock *DomBB = CmpBB->getSinglePredecessor();

1874if (!DomBB)

1875returnfalse;

1876

1877// We want to ensure that the only way control gets to the comparison of

1878// interest is that a less/greater than comparison on the same operands is

1879// false.

1880Value *DomCond;

1881BasicBlock *TrueBB, *FalseBB;

1882if (!match(DomBB->getTerminator(),m_Br(m_Value(DomCond), TrueBB, FalseBB)))

1883returnfalse;

1884if (CmpBB != FalseBB)

1885returnfalse;

1886

1887Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);

1888CmpPredicate DomPred;

1889if (!match(DomCond,m_ICmp(DomPred,m_Specific(CmpOp0),m_Specific(CmpOp1))))

1890returnfalse;

1891if (DomPred !=ICmpInst::ICMP_SGT && DomPred !=ICmpInst::ICMP_SLT)

1892returnfalse;

1893

1894// Convert the equality comparison to the opposite of the dominating

1895// comparison and swap the direction for all branch/select users.

1896// We have conceptually converted:

1897// Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;

1898// to

1899// Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;

1900// And similarly for branches.

1901for (User *U : Cmp->users()) {

1902if (auto *BI = dyn_cast<BranchInst>(U)) {

1903assert(BI->isConditional() &&"Must be conditional");

1904 BI->swapSuccessors();

1905continue;

1906 }

1907if (auto *SI = dyn_cast<SelectInst>(U)) {

1908// Swap operands

1909 SI->swapValues();

1910 SI->swapProfMetadata();

1911continue;

1912 }

1913llvm_unreachable("Must be a branch or a select");

1914 }

1915 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));

1916returntrue;

1917}

1918

1919/// Many architectures use the same instruction for both subtract and cmp. Try

1920/// to swap cmp operands to match subtract operations to allow for CSE.

1921staticboolswapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {

1922Value *Op0 = Cmp->getOperand(0);

1923Value *Op1 = Cmp->getOperand(1);

1924if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||

1925 isa<Constant>(Op1) || Op0 == Op1)

1926returnfalse;

1927

1928// If a subtract already has the same operands as a compare, swapping would be

1929// bad. If a subtract has the same operands as a compare but in reverse order,

1930// then swapping is good.

1931int GoodToSwap = 0;

1932unsigned NumInspected = 0;

1933for (constUser *U : Op0->users()) {

1934// Avoid walking many users.

1935if (++NumInspected > 128)

1936returnfalse;

1937if (match(U,m_Sub(m_Specific(Op1),m_Specific(Op0))))

1938 GoodToSwap++;

1939elseif (match(U,m_Sub(m_Specific(Op0),m_Specific(Op1))))

1940 GoodToSwap--;

1941 }

1942

1943if (GoodToSwap > 0) {

1944 Cmp->swapOperands();

1945returntrue;

1946 }

1947returnfalse;

1948}

1949

1950staticboolfoldFCmpToFPClassTest(CmpInst *Cmp,constTargetLowering &TLI,

1951constDataLayout &DL) {

1952FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);

1953if (!FCmp)

1954returnfalse;

1955

1956// Don't fold if the target offers free fabs and the predicate is legal.

1957EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());

1958if (TLI.isFAbsFree(VT) &&

1959 TLI.isCondCodeLegal(getFCmpCondCode(FCmp->getPredicate()),

1960 VT.getSimpleVT()))

1961returnfalse;

1962

1963// Reverse the canonicalization if it is a FP class test

1964auto ShouldReverseTransform = [](FPClassTest ClassTest) {

1965return ClassTest ==fcInf || ClassTest == (fcInf |fcNan);

1966 };

1967auto [ClassVal, ClassTest] =

1968fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),

1969 FCmp->getOperand(0), FCmp->getOperand(1));

1970if (!ClassVal)

1971returnfalse;

1972

1973if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))

1974returnfalse;

1975

1976IRBuilder<> Builder(Cmp);

1977Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);

1978 Cmp->replaceAllUsesWith(IsFPClass);

1979RecursivelyDeleteTriviallyDeadInstructions(Cmp);

1980returntrue;

1981}

1982

1983staticboolisRemOfLoopIncrementWithLoopInvariant(

1984Instruction *Rem,constLoopInfo *LI,Value *&RemAmtOut,Value *&AddInstOut,

1985Value *&AddOffsetOut,PHINode *&LoopIncrPNOut) {

1986Value *Incr, *RemAmt;

1987// NB: If RemAmt is a power of 2 it *should* have been transformed by now.

1988if (!match(Rem,m_URem(m_Value(Incr),m_Value(RemAmt))))

1989returnfalse;

1990

1991Value *AddInst, *AddOffset;

1992// Find out loop increment PHI.

1993auto *PN = dyn_cast<PHINode>(Incr);

1994if (PN !=nullptr) {

1995 AddInst =nullptr;

1996 AddOffset =nullptr;

1997 }else {

1998// Search through a NUW add on top of the loop increment.

1999Value *V0, *V1;

2000if (!match(Incr,m_NUWAdd(m_Value(V0),m_Value(V1))))

2001returnfalse;

2002

2003 AddInst = Incr;

2004 PN = dyn_cast<PHINode>(V0);

2005if (PN !=nullptr) {

2006 AddOffset = V1;

2007 }else {

2008 PN = dyn_cast<PHINode>(V1);

2009 AddOffset = V0;

2010 }

2011 }

2012

2013if (!PN)

2014returnfalse;

2015

2016// This isn't strictly necessary, what we really need is one increment and any

2017// amount of initial values all being the same.

2018if (PN->getNumIncomingValues() != 2)

2019returnfalse;

2020

2021// Only trivially analyzable loops.

2022Loop *L = LI->getLoopFor(PN->getParent());

2023if (!L || !L->getLoopPreheader() || !L->getLoopLatch())

2024returnfalse;

2025

2026// Req that the remainder is in the loop

2027if (!L->contains(Rem))

2028returnfalse;

2029

2030// Only works if the remainder amount is a loop invaraint

2031if (!L->isLoopInvariant(RemAmt))

2032returnfalse;

2033

2034// Is the PHI a loop increment?

2035auto LoopIncrInfo =getIVIncrement(PN, LI);

2036if (!LoopIncrInfo)

2037returnfalse;

2038

2039// We need remainder_amount % increment_amount to be zero. Increment of one

2040// satisfies that without any special logic and is overwhelmingly the common

2041// case.

2042if (!match(LoopIncrInfo->second,m_One()))

2043returnfalse;

2044

2045// Need the increment to not overflow.

2046if (!match(LoopIncrInfo->first,m_c_NUWAdd(m_Specific(PN),m_Value())))

2047returnfalse;

2048

2049// Set output variables.

2050 RemAmtOut = RemAmt;

2051 LoopIncrPNOut = PN;

2052 AddInstOut = AddInst;

2053 AddOffsetOut = AddOffset;

2054

2055returntrue;

2056}

2057

2058// Try to transform:

2059//

2060// for(i = Start; i < End; ++i)

2061// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;

2062//

2063// ->

2064//

2065// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;

2066// for(i = Start; i < End; ++i, ++rem)

2067// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;

2068staticboolfoldURemOfLoopIncrement(Instruction *Rem,constDataLayout *DL,

2069constLoopInfo *LI,

2070SmallSet<BasicBlock *, 32> &FreshBBs,

2071bool IsHuge) {

2072Value *AddOffset, *RemAmt, *AddInst;

2073PHINode *LoopIncrPN;

2074if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,

2075 AddOffset, LoopIncrPN))

2076returnfalse;

2077

2078// Only non-constant remainder as the extra IV is probably not profitable

2079// in that case.

2080//

2081// Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If

2082// we can rule out register pressure and ensure this `urem` is executed each

2083// iteration, its probably profitable to handle the const case as well.

2084//

2085// Potential TODO(2): Should we have a check for how "nested" this remainder

2086// operation is? The new code runs every iteration so if the remainder is

2087// guarded behind unlikely conditions this might not be worth it.

2088if (match(RemAmt,m_ImmConstant()))

2089returnfalse;

2090

2091Loop *L = LI->getLoopFor(LoopIncrPN->getParent());

2092Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());

2093// If we have add create initial value for remainder.

2094// The logic here is:

2095// (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant

2096//

2097// Only proceed if the expression simplifies (otherwise we can't fully

2098// optimize out the urem).

2099if (AddInst) {

2100assert(AddOffset &&"We found an add but missing values");

2101// Without dom-condition/assumption cache we aren't likely to get much out

2102// of a context instruction.

2103 Start =simplifyAddInst(Start, AddOffset,

2104match(AddInst,m_NSWAdd(m_Value(),m_Value())),

2105/*IsNUW=*/true, *DL);

2106if (!Start)

2107returnfalse;

2108 }

2109

2110// If we can't fully optimize out the `rem`, skip this transform.

2111 Start =simplifyURemInst(Start, RemAmt, *DL);

2112if (!Start)

2113returnfalse;

2114

2115// Create new remainder with induction variable.

2116Type *Ty = Rem->getType();

2117IRBuilder<> Builder(Rem->getContext());

2118

2119 Builder.SetInsertPoint(LoopIncrPN);

2120PHINode *NewRem = Builder.CreatePHI(Ty, 2);

2121

2122 Builder.SetInsertPoint(cast<Instruction>(

2123 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));

2124// `(add (urem x, y), 1)` is always nuw.

2125Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));

2126Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);

2127Value *RemSel =

2128 Builder.CreateSelect(RemCmp,Constant::getNullValue(Ty), RemAdd);

2129

2130 NewRem->addIncoming(Start, L->getLoopPreheader());

2131 NewRem->addIncoming(RemSel, L->getLoopLatch());

2132

2133// Insert all touched BBs.

2134 FreshBBs.insert(LoopIncrPN->getParent());

2135 FreshBBs.insert(L->getLoopLatch());

2136 FreshBBs.insert(Rem->getParent());

2137if (AddInst)

2138 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());

2139replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);

2140 Rem->eraseFromParent();

2141if (AddInst && AddInst->use_empty())

2142 cast<Instruction>(AddInst)->eraseFromParent();

2143returntrue;

2144}

2145

2146bool CodeGenPrepare::optimizeURem(Instruction *Rem) {

2147if (foldURemOfLoopIncrement(Rem,DL, LI, FreshBBs, IsHugeFunc))

2148returntrue;

2149returnfalse;

2150}

2151

2152/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.

2153/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the

2154/// result cannot be zero.

2155staticbooladjustIsPower2Test(CmpInst *Cmp,constTargetLowering &TLI,

2156constTargetTransformInfo &TTI,

2157constDataLayout &DL) {

2158CmpPredicate Pred;

2159if (!match(Cmp,m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(),m_One())))

2160returnfalse;

2161if (!ICmpInst::isEquality(Pred))

2162returnfalse;

2163auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));

2164

2165if (isKnownNonZero(II,DL)) {

2166if (Pred ==ICmpInst::ICMP_EQ) {

2167 Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));

2168 Cmp->setPredicate(ICmpInst::ICMP_ULT);

2169 }else {

2170 Cmp->setPredicate(ICmpInst::ICMP_UGT);

2171 }

2172returntrue;

2173 }

2174returnfalse;

2175}

2176

2177bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {

2178if (sinkCmpExpression(Cmp, *TLI))

2179returntrue;

2180

2181if (combineToUAddWithOverflow(Cmp, ModifiedDT))

2182returntrue;

2183

2184if (combineToUSubWithOverflow(Cmp, ModifiedDT))

2185returntrue;

2186

2187if (foldICmpWithDominatingICmp(Cmp, *TLI))

2188returntrue;

2189

2190if (swapICmpOperandsToExposeCSEOpportunities(Cmp))

2191returntrue;

2192

2193if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))

2194returntrue;

2195

2196if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))

2197returntrue;

2198

2199returnfalse;

2200}

2201

2202/// Duplicate and sink the given 'and' instruction into user blocks where it is

2203/// used in a compare to allow isel to generate better code for targets where

2204/// this operation can be combined.

2205///

2206/// Return true if any changes are made.

2207staticboolsinkAndCmp0Expression(Instruction *AndI,constTargetLowering &TLI,

2208 SetOfInstrs &InsertedInsts) {

2209// Double-check that we're not trying to optimize an instruction that was

2210// already optimized by some other part of this pass.

2211assert(!InsertedInsts.count(AndI) &&

2212"Attempting to optimize already optimized and instruction");

2213 (void)InsertedInsts;

2214

2215// Nothing to do for single use in same basic block.

2216if (AndI->hasOneUse() &&

2217 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())

2218returnfalse;

2219

2220// Try to avoid cases where sinking/duplicating is likely to increase register

2221// pressure.

2222if (!isa<ConstantInt>(AndI->getOperand(0)) &&

2223 !isa<ConstantInt>(AndI->getOperand(1)) &&

2224 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())

2225returnfalse;

2226

2227for (auto *U : AndI->users()) {

2228Instruction *User = cast<Instruction>(U);

2229

2230// Only sink 'and' feeding icmp with 0.

2231if (!isa<ICmpInst>(User))

2232returnfalse;

2233

2234auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));

2235if (!CmpC || !CmpC->isZero())

2236returnfalse;

2237 }

2238

2239if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))

2240returnfalse;

2241

2242LLVM_DEBUG(dbgs() <<"found 'and' feeding only icmp 0;\n");

2243LLVM_DEBUG(AndI->getParent()->dump());

2244

2245// Push the 'and' into the same block as the icmp 0. There should only be

2246// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any

2247// others, so we don't need to keep track of which BBs we insert into.

2248for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();

2249 UI != E;) {

2250Use &TheUse = UI.getUse();

2251Instruction *User = cast<Instruction>(*UI);

2252

2253// Preincrement use iterator so we don't invalidate it.

2254 ++UI;

2255

2256LLVM_DEBUG(dbgs() <<"sinking 'and' use: " << *User <<"\n");

2257

2258// Keep the 'and' in the same place if the use is already in the same block.

2259Instruction *InsertPt =

2260User->getParent() == AndI->getParent() ? AndI :User;

2261Instruction *InsertedAnd =BinaryOperator::Create(

2262 Instruction::And, AndI->getOperand(0), AndI->getOperand(1),"",

2263 InsertPt->getIterator());

2264// Propagate the debug info.

2265 InsertedAnd->setDebugLoc(AndI->getDebugLoc());

2266

2267// Replace a use of the 'and' with a use of the new 'and'.

2268 TheUse = InsertedAnd;

2269 ++NumAndUses;

2270LLVM_DEBUG(User->getParent()->dump());

2271 }

2272

2273// We removed all uses, nuke the and.

2274 AndI->eraseFromParent();

2275returntrue;

2276}

2277

2278/// Check if the candidates could be combined with a shift instruction, which

2279/// includes:

2280/// 1. Truncate instruction

2281/// 2. And instruction and the imm is a mask of the low bits:

2282/// imm & (imm+1) == 0

2283staticboolisExtractBitsCandidateUse(Instruction *User) {

2284if (!isa<TruncInst>(User)) {

2285if (User->getOpcode() != Instruction::And ||

2286 !isa<ConstantInt>(User->getOperand(1)))

2287returnfalse;

2288

2289constAPInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();

2290

2291if ((Cimm & (Cimm + 1)).getBoolValue())

2292returnfalse;

2293 }

2294returntrue;

2295}

2296

2297/// Sink both shift and truncate instruction to the use of truncate's BB.

2298staticbool

2299SinkShiftAndTruncate(BinaryOperator *ShiftI,Instruction *User,ConstantInt *CI,

2300DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,

2301constTargetLowering &TLI,constDataLayout &DL) {

2302BasicBlock *UserBB =User->getParent();

2303DenseMap<BasicBlock *, CastInst *> InsertedTruncs;

2304auto *TruncI = cast<TruncInst>(User);

2305bool MadeChange =false;

2306

2307for (Value::user_iterator TruncUI = TruncI->user_begin(),

2308 TruncE = TruncI->user_end();

2309 TruncUI != TruncE;) {

2310

2311Use &TruncTheUse = TruncUI.getUse();

2312Instruction *TruncUser = cast<Instruction>(*TruncUI);

2313// Preincrement use iterator so we don't invalidate it.

2314

2315 ++TruncUI;

2316

2317int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());

2318if (!ISDOpcode)

2319continue;

2320

2321// If the use is actually a legal node, there will not be an

2322// implicit truncate.

2323// FIXME: always querying the result type is just an

2324// approximation; some nodes' legality is determined by the

2325// operand or other means. There's no good way to find out though.

2326if (TLI.isOperationLegalOrCustom(

2327 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(),true)))

2328continue;

2329

2330// Don't bother for PHI nodes.

2331if (isa<PHINode>(TruncUser))

2332continue;

2333

2334BasicBlock *TruncUserBB = TruncUser->getParent();

2335

2336if (UserBB == TruncUserBB)

2337continue;

2338

2339BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];

2340CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];

2341

2342if (!InsertedShift && !InsertedTrunc) {

2343BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();

2344assert(InsertPt != TruncUserBB->end());

2345// Sink the shift

2346if (ShiftI->getOpcode() == Instruction::AShr)

2347 InsertedShift =

2348 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,"");

2349else

2350 InsertedShift =

2351 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,"");

2352 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());

2353 InsertedShift->insertBefore(*TruncUserBB, InsertPt);

2354

2355// Sink the trunc

2356BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();

2357 TruncInsertPt++;

2358// It will go ahead of any debug-info.

2359 TruncInsertPt.setHeadBit(true);

2360assert(TruncInsertPt != TruncUserBB->end());

2361

2362 InsertedTrunc =CastInst::Create(TruncI->getOpcode(), InsertedShift,

2363 TruncI->getType(),"");

2364 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);

2365 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());

2366

2367 MadeChange =true;

2368

2369 TruncTheUse = InsertedTrunc;

2370 }

2371 }

2372return MadeChange;

2373}

2374

2375/// Sink the shift *right* instruction into user blocks if the uses could

2376/// potentially be combined with this shift instruction and generate BitExtract

2377/// instruction. It will only be applied if the architecture supports BitExtract

2378/// instruction. Here is an example:

2379/// BB1:

2380/// %x.extract.shift = lshr i64 %arg1, 32

2381/// BB2:

2382/// %x.extract.trunc = trunc i64 %x.extract.shift to i16

2383/// ==>

2384///

2385/// BB2:

2386/// %x.extract.shift.1 = lshr i64 %arg1, 32

2387/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16

2388///

2389/// CodeGen will recognize the pattern in BB2 and generate BitExtract

2390/// instruction.

2391/// Return true if any changes are made.

2392staticboolOptimizeExtractBits(BinaryOperator *ShiftI,ConstantInt *CI,

2393constTargetLowering &TLI,

2394constDataLayout &DL) {

2395BasicBlock *DefBB = ShiftI->getParent();

2396

2397 /// Only insert instructions in each block once.

2398DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;

2399

2400bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));

2401

2402bool MadeChange =false;

2403for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();

2404 UI != E;) {

2405Use &TheUse = UI.getUse();

2406Instruction *User = cast<Instruction>(*UI);

2407// Preincrement use iterator so we don't invalidate it.

2408 ++UI;

2409

2410// Don't bother for PHI nodes.

2411if (isa<PHINode>(User))

2412continue;

2413

2414if (!isExtractBitsCandidateUse(User))

2415continue;

2416

2417BasicBlock *UserBB =User->getParent();

2418

2419if (UserBB == DefBB) {

2420// If the shift and truncate instruction are in the same BB. The use of

2421// the truncate(TruncUse) may still introduce another truncate if not

2422// legal. In this case, we would like to sink both shift and truncate

2423// instruction to the BB of TruncUse.

2424// for example:

2425// BB1:

2426// i64 shift.result = lshr i64 opnd, imm

2427// trunc.result = trunc shift.result to i16

2428//

2429// BB2:

2430// ----> We will have an implicit truncate here if the architecture does

2431// not have i16 compare.

2432// cmp i16 trunc.result, opnd2

2433//

2434if (isa<TruncInst>(User) &&

2435 shiftIsLegal

2436// If the type of the truncate is legal, no truncate will be

2437// introduced in other basic blocks.

2438 && (!TLI.isTypeLegal(TLI.getValueType(DL,User->getType()))))

2439 MadeChange =

2440SinkShiftAndTruncate(ShiftI,User, CI, InsertedShifts, TLI,DL);

2441

2442continue;

2443 }

2444// If we have already inserted a shift into this block, use it.

2445BinaryOperator *&InsertedShift = InsertedShifts[UserBB];

2446

2447if (!InsertedShift) {

2448BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

2449assert(InsertPt != UserBB->end());

2450

2451if (ShiftI->getOpcode() == Instruction::AShr)

2452 InsertedShift =

2453 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,"");

2454else

2455 InsertedShift =

2456 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,"");

2457 InsertedShift->insertBefore(*UserBB, InsertPt);

2458 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());

2459

2460 MadeChange =true;

2461 }

2462

2463// Replace a use of the shift with a use of the new shift.

2464 TheUse = InsertedShift;

2465 }

2466

2467// If we removed all uses, or there are none, nuke the shift.

2468if (ShiftI->use_empty()) {

2469salvageDebugInfo(*ShiftI);

2470 ShiftI->eraseFromParent();

2471 MadeChange =true;

2472 }

2473

2474return MadeChange;

2475}

2476

2477/// If counting leading or trailing zeros is an expensive operation and a zero

2478/// input is defined, add a check for zero to avoid calling the intrinsic.

2479///

2480/// We want to transform:

2481/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)

2482///

2483/// into:

2484/// entry:

2485/// %cmpz = icmp eq i64 %A, 0

2486/// br i1 %cmpz, label %cond.end, label %cond.false

2487/// cond.false:

2488/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)

2489/// br label %cond.end

2490/// cond.end:

2491/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]

2492///

2493/// If the transform is performed, return true and set ModifiedDT to true.

2494staticbooldespeculateCountZeros(IntrinsicInst *CountZeros,

2495LoopInfo &LI,

2496constTargetLowering *TLI,

2497constDataLayout *DL, ModifyDT &ModifiedDT,

2498SmallSet<BasicBlock *, 32> &FreshBBs,

2499bool IsHugeFunc) {

2500// If a zero input is undefined, it doesn't make sense to despeculate that.

2501if (match(CountZeros->getOperand(1),m_One()))

2502returnfalse;

2503

2504// If it's cheap to speculate, there's nothing to do.

2505Type *Ty = CountZeros->getType();

2506auto IntrinsicID = CountZeros->getIntrinsicID();

2507if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||

2508 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))

2509returnfalse;

2510

2511// Only handle legal scalar cases. Anything else requires too much work.

2512unsigned SizeInBits = Ty->getScalarSizeInBits();

2513if (Ty->isVectorTy() || SizeInBits >DL->getLargestLegalIntTypeSizeInBits())

2514returnfalse;

2515

2516// Bail if the value is never zero.

2517Use &Op = CountZeros->getOperandUse(0);

2518if (isKnownNonZero(Op, *DL))

2519returnfalse;

2520

2521// The intrinsic will be sunk behind a compare against zero and branch.

2522BasicBlock *StartBlock = CountZeros->getParent();

2523BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros,"cond.false");

2524if (IsHugeFunc)

2525 FreshBBs.insert(CallBlock);

2526

2527// Create another block after the count zero intrinsic. A PHI will be added

2528// in this block to select the result of the intrinsic or the bit-width

2529// constant if the input to the intrinsic is zero.

2530BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));

2531// Any debug-info after CountZeros should not be included.

2532 SplitPt.setHeadBit(true);

2533BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt,"cond.end");

2534if (IsHugeFunc)

2535 FreshBBs.insert(EndBlock);

2536

2537// Update the LoopInfo. The new blocks are in the same loop as the start

2538// block.

2539if (Loop *L = LI.getLoopFor(StartBlock)) {

2540 L->addBasicBlockToLoop(CallBlock, LI);

2541 L->addBasicBlockToLoop(EndBlock, LI);

2542 }

2543

2544// Set up a builder to create a compare, conditional branch, and PHI.

2545IRBuilder<> Builder(CountZeros->getContext());

2546 Builder.SetInsertPoint(StartBlock->getTerminator());

2547 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());

2548

2549// Replace the unconditional branch that was created by the first split with

2550// a compare against zero and a conditional branch.

2551Value *Zero =Constant::getNullValue(Ty);

2552// Avoid introducing branch on poison. This also replaces the ctz operand.

2553if (!isGuaranteedNotToBeUndefOrPoison(Op))

2554Op = Builder.CreateFreeze(Op,Op->getName() +".fr");

2555Value *Cmp = Builder.CreateICmpEQ(Op, Zero,"cmpz");

2556 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);

2557 StartBlock->getTerminator()->eraseFromParent();

2558

2559// Create a PHI in the end block to select either the output of the intrinsic

2560// or the bit width of the operand.

2561 Builder.SetInsertPoint(EndBlock, EndBlock->begin());

2562PHINode *PN = Builder.CreatePHI(Ty, 2,"ctz");

2563replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);

2564Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));

2565 PN->addIncoming(BitWidth, StartBlock);

2566 PN->addIncoming(CountZeros, CallBlock);

2567

2568// We are explicitly handling the zero case, so we can set the intrinsic's

2569// undefined zero argument to 'true'. This will also prevent reprocessing the

2570// intrinsic; we only despeculate when a zero input is defined.

2571 CountZeros->setArgOperand(1, Builder.getTrue());

2572 ModifiedDT = ModifyDT::ModifyBBDT;

2573returntrue;

2574}

2575

2576bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {

2577BasicBlock *BB = CI->getParent();

2578

2579// Lower inline assembly if we can.

2580// If we found an inline asm expession, and if the target knows how to

2581// lower it to normal LLVM code, do so now.

2582if (CI->isInlineAsm()) {

2583if (TLI->ExpandInlineAsm(CI)) {

2584// Avoid invalidating the iterator.

2585 CurInstIterator = BB->begin();

2586// Avoid processing instructions out of order, which could cause

2587// reuse before a value is defined.

2588 SunkAddrs.clear();

2589returntrue;

2590 }

2591// Sink address computing for memory operands into the block.

2592if (optimizeInlineAsmInst(CI))

2593returntrue;

2594 }

2595

2596// Align the pointer arguments to this call if the target thinks it's a good

2597// idea

2598unsigned MinSize;

2599Align PrefAlign;

2600if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {

2601for (auto &Arg : CI->args()) {

2602// We want to align both objects whose address is used directly and

2603// objects whose address is used in casts and GEPs, though it only makes

2604// sense for GEPs if the offset is a multiple of the desired alignment and

2605// if size - offset meets the size threshold.

2606if (!Arg->getType()->isPointerTy())

2607continue;

2608APInt Offset(DL->getIndexSizeInBits(

2609 cast<PointerType>(Arg->getType())->getAddressSpace()),

2610 0);

2611Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL,Offset);

2612uint64_t Offset2 =Offset.getLimitedValue();

2613if (!isAligned(PrefAlign, Offset2))

2614continue;

2615AllocaInst *AI;

2616if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&

2617DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)

2618 AI->setAlignment(PrefAlign);

2619// Global variables can only be aligned if they are defined in this

2620// object (i.e. they are uniquely initialized in this object), and

2621// over-aligning global variables that have an explicit section is

2622// forbidden.

2623GlobalVariable *GV;

2624if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&

2625 GV->getPointerAlignment(*DL) < PrefAlign &&

2626DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)

2627 GV->setAlignment(PrefAlign);

2628 }

2629 }

2630// If this is a memcpy (or similar) then we may be able to improve the

2631// alignment.

2632if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {

2633Align DestAlign =getKnownAlignment(MI->getDest(), *DL);

2634MaybeAlign MIDestAlign =MI->getDestAlign();

2635if (!MIDestAlign || DestAlign > *MIDestAlign)

2636MI->setDestAlignment(DestAlign);

2637if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {

2638MaybeAlign MTISrcAlign = MTI->getSourceAlign();

2639Align SrcAlign =getKnownAlignment(MTI->getSource(), *DL);

2640if (!MTISrcAlign || SrcAlign > *MTISrcAlign)

2641 MTI->setSourceAlignment(SrcAlign);

2642 }

2643 }

2644

2645// If we have a cold call site, try to sink addressing computation into the

2646// cold block. This interacts with our handling for loads and stores to

2647// ensure that we can fold all uses of a potential addressing computation

2648// into their uses. TODO: generalize this to work over profiling data

2649if (CI->hasFnAttr(Attribute::Cold) &&

2650 !llvm::shouldOptimizeForSize(BB, PSI,BFI.get()))

2651for (auto &Arg : CI->args()) {

2652if (!Arg->getType()->isPointerTy())

2653continue;

2654unsigned AS = Arg->getType()->getPointerAddressSpace();

2655if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))

2656returntrue;

2657 }

2658

2659IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);

2660if (II) {

2661switch (II->getIntrinsicID()) {

2662default:

2663break;

2664case Intrinsic::assume:

2665llvm_unreachable("llvm.assume should have been removed already");

2666case Intrinsic::allow_runtime_check:

2667case Intrinsic::allow_ubsan_check:

2668case Intrinsic::experimental_widenable_condition: {

2669// Give up on future widening opportunities so that we can fold away dead

2670// paths and merge blocks before going into block-local instruction

2671// selection.

2672if (II->use_empty()) {

2673II->eraseFromParent();

2674returntrue;

2675 }

2676Constant *RetVal =ConstantInt::getTrue(II->getContext());

2677 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {

2678replaceAndRecursivelySimplify(CI, RetVal, TLInfo,nullptr);

2679 });

2680returntrue;

2681 }

2682case Intrinsic::objectsize:

2683llvm_unreachable("llvm.objectsize.* should have been lowered already");

2684case Intrinsic::is_constant:

2685llvm_unreachable("llvm.is.constant.* should have been lowered already");

2686case Intrinsic::aarch64_stlxr:

2687case Intrinsic::aarch64_stxr: {

2688ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));

2689if (!ExtVal || !ExtVal->hasOneUse() ||

2690 ExtVal->getParent() == CI->getParent())

2691returnfalse;

2692// Sink a zext feeding stlxr/stxr before it, so it can be folded into it.

2693 ExtVal->moveBefore(CI->getIterator());

2694// Mark this instruction as "inserted by CGP", so that other

2695// optimizations don't touch it.

2696 InsertedInsts.insert(ExtVal);

2697returntrue;

2698 }

2699

2700case Intrinsic::launder_invariant_group:

2701case Intrinsic::strip_invariant_group: {

2702Value *ArgVal =II->getArgOperand(0);

2703auto it = LargeOffsetGEPMap.find(II);

2704if (it != LargeOffsetGEPMap.end()) {

2705// Merge entries in LargeOffsetGEPMap to reflect the RAUW.

2706// Make sure not to have to deal with iterator invalidation

2707// after possibly adding ArgVal to LargeOffsetGEPMap.

2708auto GEPs = std::move(it->second);

2709 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());

2710 LargeOffsetGEPMap.erase(II);

2711 }

2712

2713replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);

2714II->eraseFromParent();

2715returntrue;

2716 }

2717case Intrinsic::cttz:

2718case Intrinsic::ctlz:

2719// If counting zeros is expensive, try to avoid it.

2720returndespeculateCountZeros(II, *LI, TLI,DL, ModifiedDT, FreshBBs,

2721 IsHugeFunc);

2722case Intrinsic::fshl:

2723case Intrinsic::fshr:

2724return optimizeFunnelShift(II);

2725case Intrinsic::dbg_assign:

2726case Intrinsic::dbg_value:

2727return fixupDbgValue(II);

2728case Intrinsic::masked_gather:

2729return optimizeGatherScatterInst(II,II->getArgOperand(0));

2730case Intrinsic::masked_scatter:

2731return optimizeGatherScatterInst(II,II->getArgOperand(1));

2732 }

2733

2734SmallVector<Value *, 2> PtrOps;

2735Type *AccessTy;

2736if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))

2737while (!PtrOps.empty()) {

2738Value *PtrVal = PtrOps.pop_back_val();

2739unsigned AS = PtrVal->getType()->getPointerAddressSpace();

2740if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))

2741returntrue;

2742 }

2743 }

2744

2745// From here on out we're working with named functions.

2746auto *Callee = CI->getCalledFunction();

2747if (!Callee)

2748returnfalse;

2749

2750// Lower all default uses of _chk calls. This is very similar

2751// to what InstCombineCalls does, but here we are only lowering calls

2752// to fortified library functions (e.g. __memcpy_chk) that have the default

2753// "don't know" as the objectsize. Anything else should be left alone.

2754FortifiedLibCallSimplifier Simplifier(TLInfo,true);

2755IRBuilder<> Builder(CI);

2756if (Value *V = Simplifier.optimizeCall(CI, Builder)) {

2757replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);

2758 CI->eraseFromParent();

2759returntrue;

2760 }

2761

2762// SCCP may have propagated, among other things, C++ static variables across

2763// calls. If this happens to be the case, we may want to undo it in order to

2764// avoid redundant pointer computation of the constant, as the function method

2765// returning the constant needs to be executed anyways.

2766auto GetUniformReturnValue = [](constFunction *F) ->GlobalVariable * {

2767if (!F->getReturnType()->isPointerTy())

2768returnnullptr;

2769

2770GlobalVariable *UniformValue =nullptr;

2771for (auto &BB : *F) {

2772if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {

2773if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {

2774if (!UniformValue)

2775 UniformValue =V;

2776elseif (V != UniformValue)

2777returnnullptr;

2778 }else {

2779returnnullptr;

2780 }

2781 }

2782 }

2783

2784return UniformValue;

2785 };

2786

2787if (Callee->hasExactDefinition()) {

2788if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {

2789bool MadeChange =false;

2790for (Use &U :make_early_inc_range(RV->uses())) {

2791auto *I = dyn_cast<Instruction>(U.getUser());

2792if (!I ||I->getParent() != CI->getParent()) {

2793// Limit to the same basic block to avoid extending the call-site live

2794// range, which otherwise could increase register pressure.

2795continue;

2796 }

2797if (CI->comesBefore(I)) {

2798U.set(CI);

2799 MadeChange =true;

2800 }

2801 }

2802

2803return MadeChange;

2804 }

2805 }

2806

2807returnfalse;

2808}

2809

2810staticboolisIntrinsicOrLFToBeTailCalled(constTargetLibraryInfo *TLInfo,

2811constCallInst *CI) {

2812assert(CI && CI->use_empty());

2813

2814if (constauto *II = dyn_cast<IntrinsicInst>(CI))

2815switch (II->getIntrinsicID()) {

2816case Intrinsic::memset:

2817case Intrinsic::memcpy:

2818case Intrinsic::memmove:

2819returntrue;

2820default:

2821returnfalse;

2822 }

2823

2824LibFunc LF;

2825Function *Callee = CI->getCalledFunction();

2826if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))

2827switch (LF) {

2828case LibFunc_strcpy:

2829case LibFunc_strncpy:

2830case LibFunc_strcat:

2831case LibFunc_strncat:

2832returntrue;

2833default:

2834returnfalse;

2835 }

2836

2837returnfalse;

2838}

2839

2840/// Look for opportunities to duplicate return instructions to the predecessor

2841/// to enable tail call optimizations. The case it is currently looking for is

2842/// the following one. Known intrinsics or library function that may be tail

2843/// called are taken into account as well.

2844/// @code

2845/// bb0:

2846/// %tmp0 = tail call i32 @f0()

2847/// br label %return

2848/// bb1:

2849/// %tmp1 = tail call i32 @f1()

2850/// br label %return

2851/// bb2:

2852/// %tmp2 = tail call i32 @f2()

2853/// br label %return

2854/// return:

2855/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]

2856/// ret i32 %retval

2857/// @endcode

2858///

2859/// =>

2860///

2861/// @code

2862/// bb0:

2863/// %tmp0 = tail call i32 @f0()

2864/// ret i32 %tmp0

2865/// bb1:

2866/// %tmp1 = tail call i32 @f1()

2867/// ret i32 %tmp1

2868/// bb2:

2869/// %tmp2 = tail call i32 @f2()

2870/// ret i32 %tmp2

2871/// @endcode

2872bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,

2873 ModifyDT &ModifiedDT) {

2874if (!BB->getTerminator())

2875returnfalse;

2876

2877ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());

2878if (!RetI)

2879returnfalse;

2880

2881assert(LI->getLoopFor(BB) ==nullptr &&"A return block cannot be in a loop");

2882

2883PHINode *PN =nullptr;

2884ExtractValueInst *EVI =nullptr;

2885BitCastInst *BCI =nullptr;

2886Value *V = RetI->getReturnValue();

2887if (V) {

2888 BCI = dyn_cast<BitCastInst>(V);

2889if (BCI)

2890V = BCI->getOperand(0);

2891

2892 EVI = dyn_cast<ExtractValueInst>(V);

2893if (EVI) {

2894V = EVI->getOperand(0);

2895if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))

2896returnfalse;

2897 }

2898

2899 PN = dyn_cast<PHINode>(V);

2900 }

2901

2902if (PN && PN->getParent() != BB)

2903returnfalse;

2904

2905auto isLifetimeEndOrBitCastFor = [](constInstruction *Inst) {

2906constBitCastInst *BC = dyn_cast<BitCastInst>(Inst);

2907if (BC && BC->hasOneUse())

2908 Inst = BC->user_back();

2909

2910if (constIntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))

2911returnII->getIntrinsicID() == Intrinsic::lifetime_end;

2912returnfalse;

2913 };

2914

2915SmallVector<const IntrinsicInst *, 4> FakeUses;

2916

2917auto isFakeUse = [&FakeUses](constInstruction *Inst) {

2918if (auto *II = dyn_cast<IntrinsicInst>(Inst);

2919II &&II->getIntrinsicID() == Intrinsic::fake_use) {

2920// Record the instruction so it can be preserved when the exit block is

2921// removed. Do not preserve the fake use that uses the result of the

2922// PHI instruction.

2923// Do not copy fake uses that use the result of a PHI node.

2924// FIXME: If we do want to copy the fake use into the return blocks, we

2925// have to figure out which of the PHI node operands to use for each

2926// copy.

2927if (!isa<PHINode>(II->getOperand(0))) {

2928 FakeUses.push_back(II);

2929 }

2930returntrue;

2931 }

2932

2933returnfalse;

2934 };

2935

2936// Make sure there are no instructions between the first instruction

2937// and return.

2938BasicBlock::const_iterator BI = BB->getFirstNonPHIIt();

2939// Skip over debug and the bitcast.

2940while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||

2941 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(&*BI) ||

2942 isFakeUse(&*BI))

2943 BI = std::next(BI);

2944if (&*BI != RetI)

2945returnfalse;

2946

2947 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail

2948 /// call.

2949constFunction *F = BB->getParent();

2950SmallVector<BasicBlock *, 4> TailCallBBs;

2951// Record the call instructions so we can insert any fake uses

2952// that need to be preserved before them.

2953SmallVector<CallInst *, 4> CallInsts;

2954if (PN) {

2955for (unsignedI = 0, E = PN->getNumIncomingValues();I != E; ++I) {

2956// Look through bitcasts.

2957Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();

2958CallInst *CI = dyn_cast<CallInst>(IncomingVal);

2959BasicBlock *PredBB = PN->getIncomingBlock(I);

2960// Make sure the phi value is indeed produced by the tail call.

2961if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&

2962 TLI->mayBeEmittedAsTailCall(CI) &&

2963attributesPermitTailCall(F, CI, RetI, *TLI)) {

2964 TailCallBBs.push_back(PredBB);

2965 CallInsts.push_back(CI);

2966 }else {

2967// Consider the cases in which the phi value is indirectly produced by

2968// the tail call, for example when encountering memset(), memmove(),

2969// strcpy(), whose return value may have been optimized out. In such

2970// cases, the value needs to be the first function argument.

2971//

2972// bb0:

2973// tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)

2974// br label %return

2975// return:

2976// %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]

2977if (PredBB && PredBB->getSingleSuccessor() == BB)

2978 CI = dyn_cast_or_null<CallInst>(

2979 PredBB->getTerminator()->getPrevNonDebugInstruction(true));

2980

2981if (CI && CI->use_empty() &&

2982isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&

2983 IncomingVal == CI->getArgOperand(0) &&

2984 TLI->mayBeEmittedAsTailCall(CI) &&

2985attributesPermitTailCall(F, CI, RetI, *TLI)) {

2986 TailCallBBs.push_back(PredBB);

2987 CallInsts.push_back(CI);

2988 }

2989 }

2990 }

2991 }else {

2992SmallPtrSet<BasicBlock *, 4> VisitedBBs;

2993for (BasicBlock *Pred :predecessors(BB)) {

2994if (!VisitedBBs.insert(Pred).second)

2995continue;

2996if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {

2997CallInst *CI = dyn_cast<CallInst>(I);

2998if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&

2999attributesPermitTailCall(F, CI, RetI, *TLI)) {

3000// Either we return void or the return value must be the first

3001// argument of a known intrinsic or library function.

3002if (!V || isa<UndefValue>(V) ||

3003 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&

3004 V == CI->getArgOperand(0))) {

3005 TailCallBBs.push_back(Pred);

3006 CallInsts.push_back(CI);

3007 }

3008 }

3009 }

3010 }

3011 }

3012

3013bool Changed =false;

3014for (autoconst &TailCallBB : TailCallBBs) {

3015// Make sure the call instruction is followed by an unconditional branch to

3016// the return block.

3017BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());

3018if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)

3019continue;

3020

3021// Duplicate the return into TailCallBB.

3022 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);

3023assert(!VerifyBFIUpdates ||

3024BFI->getBlockFreq(BB) >=BFI->getBlockFreq(TailCallBB));

3025BFI->setBlockFreq(BB,

3026 (BFI->getBlockFreq(BB) -BFI->getBlockFreq(TailCallBB)));

3027 ModifiedDT = ModifyDT::ModifyBBDT;

3028 Changed =true;

3029 ++NumRetsDup;

3030 }

3031

3032// If we eliminated all predecessors of the block, delete the block now.

3033if (Changed && !BB->hasAddressTaken() &&pred_empty(BB)) {

3034// Copy the fake uses found in the original return block to all blocks

3035// that contain tail calls.

3036for (auto *CI : CallInsts) {

3037for (autoconst *FakeUse : FakeUses) {

3038auto *ClonedInst = FakeUse->clone();

3039 ClonedInst->insertBefore(CI->getIterator());

3040 }

3041 }

3042 BB->eraseFromParent();

3043 }

3044

3045return Changed;

3046}

3047

3048//===----------------------------------------------------------------------===//

3049// Memory Optimization

3050//===----------------------------------------------------------------------===//

3051

3052namespace{

3053

3054/// This is an extended version of TargetLowering::AddrMode

3055/// which holds actual Value*'s for register values.

3056structExtAddrMode :publicTargetLowering::AddrMode {

3057Value *BaseReg =nullptr;

3058Value *ScaledReg =nullptr;

3059Value *OriginalValue =nullptr;

3060bool InBounds =true;

3061

3062enum FieldName {

3063 NoField = 0x00,

3064 BaseRegField = 0x01,

3065 BaseGVField = 0x02,

3066 BaseOffsField = 0x04,

3067 ScaledRegField = 0x08,

3068 ScaleField = 0x10,

3069 MultipleFields = 0xff

3070 };

3071

3072ExtAddrMode() =default;

3073

3074voidprint(raw_ostream &OS)const;

3075voiddump()const;

3076

3077 FieldNamecompare(constExtAddrMode &other) {

3078// First check that the types are the same on each field, as differing types

3079// is something we can't cope with later on.

3080if (BaseReg && other.BaseReg &&

3081BaseReg->getType() != other.BaseReg->getType())

3082return MultipleFields;

3083if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())

3084return MultipleFields;

3085if (ScaledReg && other.ScaledReg &&

3086ScaledReg->getType() != other.ScaledReg->getType())

3087return MultipleFields;

3088

3089// Conservatively reject 'inbounds' mismatches.

3090if (InBounds != other.InBounds)

3091return MultipleFields;

3092

3093// Check each field to see if it differs.

3094unsignedResult = NoField;

3095if (BaseReg != other.BaseReg)

3096Result |= BaseRegField;

3097if (BaseGV != other.BaseGV)

3098Result |= BaseGVField;

3099if (BaseOffs != other.BaseOffs)

3100Result |= BaseOffsField;

3101if (ScaledReg != other.ScaledReg)

3102Result |= ScaledRegField;

3103// Don't count 0 as being a different scale, because that actually means

3104// unscaled (which will already be counted by having no ScaledReg).

3105if (Scale && other.Scale &&Scale != other.Scale)

3106Result |= ScaleField;

3107

3108if (llvm::popcount(Result) > 1)

3109return MultipleFields;

3110else

3111returnstatic_cast<FieldName>(Result);

3112 }

3113

3114// An AddrMode is trivial if it involves no calculation i.e. it is just a base

3115// with no offset.

3116bool isTrivial() {

3117// An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is

3118// trivial if at most one of these terms is nonzero, except that BaseGV and

3119// BaseReg both being zero actually means a null pointer value, which we

3120// consider to be 'non-zero' here.

3121return !BaseOffs && !Scale && !(BaseGV &&BaseReg);

3122 }

3123

3124Value *GetFieldAsValue(FieldNameField,Type *IntPtrTy) {

3125switch (Field) {

3126default:

3127returnnullptr;

3128case BaseRegField:

3129returnBaseReg;

3130case BaseGVField:

3131return BaseGV;

3132case ScaledRegField:

3133returnScaledReg;

3134case BaseOffsField:

3135return ConstantInt::get(IntPtrTy, BaseOffs);

3136 }

3137 }

3138

3139void SetCombinedField(FieldNameField,Value *V,

3140constSmallVectorImpl<ExtAddrMode> &AddrModes) {

3141switch (Field) {

3142default:

3143llvm_unreachable("Unhandled fields are expected to be rejected earlier");

3144break;

3145case ExtAddrMode::BaseRegField:

3146BaseReg =V;

3147break;

3148case ExtAddrMode::BaseGVField:

3149// A combined BaseGV is an Instruction, not a GlobalValue, so it goes

3150// in the BaseReg field.

3151assert(BaseReg ==nullptr);

3152BaseReg =V;

3153 BaseGV =nullptr;

3154break;

3155case ExtAddrMode::ScaledRegField:

3156ScaledReg =V;

3157// If we have a mix of scaled and unscaled addrmodes then we want scale

3158// to be the scale and not zero.

3159if (!Scale)

3160for (constExtAddrMode &AM : AddrModes)

3161if (AM.Scale) {

3162Scale = AM.Scale;

3163break;

3164 }

3165break;

3166case ExtAddrMode::BaseOffsField:

3167// The offset is no longer a constant, so it goes in ScaledReg with a

3168// scale of 1.

3169assert(ScaledReg ==nullptr);

3170ScaledReg =V;

3171Scale = 1;

3172 BaseOffs = 0;

3173break;

3174 }

3175 }

3176};

3177

3178#ifndef NDEBUG

3179staticinlineraw_ostream &operator<<(raw_ostream &OS,constExtAddrMode &AM) {

3180 AM.print(OS);

3181returnOS;

3182}

3183#endif

3184

3185#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

3186void ExtAddrMode::print(raw_ostream &OS) const{

3187bool NeedPlus =false;

3188OS <<"[";

3189if (InBounds)

3190OS <<"inbounds ";

3191if (BaseGV) {

3192OS <<"GV:";

3193 BaseGV->printAsOperand(OS,/*PrintType=*/false);

3194 NeedPlus =true;

3195 }

3196

3197if (BaseOffs) {

3198OS << (NeedPlus ?" + " :"") << BaseOffs;

3199 NeedPlus =true;

3200 }

3201

3202if (BaseReg) {

3203OS << (NeedPlus ?" + " :"") <<"Base:";

3204BaseReg->printAsOperand(OS,/*PrintType=*/false);

3205 NeedPlus =true;

3206 }

3207if (Scale) {

3208OS << (NeedPlus ?" + " :"") <<Scale <<"*";

3209ScaledReg->printAsOperand(OS,/*PrintType=*/false);

3210 }

3211

3212OS <<']';

3213}

3214

3215LLVM_DUMP_METHODvoid ExtAddrMode::dump() const{

3216print(dbgs());

3217dbgs() <<'\n';

3218}

3219#endif

3220

3221}// end anonymous namespace

3222

3223namespace{

3224

3225/// This class provides transaction based operation on the IR.

3226/// Every change made through this class is recorded in the internal state and

3227/// can be undone (rollback) until commit is called.

3228/// CGP does not check if instructions could be speculatively executed when

3229/// moved. Preserving the original location would pessimize the debugging

3230/// experience, as well as negatively impact the quality of sample PGO.

3231classTypePromotionTransaction {

3232 /// This represents the common interface of the individual transaction.

3233 /// Each class implements the logic for doing one specific modification on

3234 /// the IR via the TypePromotionTransaction.

3235classTypePromotionAction {

3236protected:

3237 /// The Instruction modified.

3238Instruction *Inst;

3239

3240public:

3241 /// Constructor of the action.

3242 /// The constructor performs the related action on the IR.

3243 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}

3244

3245virtual ~TypePromotionAction() =default;

3246

3247 /// Undo the modification done by this action.

3248 /// When this method is called, the IR must be in the same state as it was

3249 /// before this action was applied.

3250 /// \pre Undoing the action works if and only if the IR is in the exact same

3251 /// state as it was directly after this action was applied.

3252virtualvoid undo() = 0;

3253

3254 /// Advocate every change made by this action.

3255 /// When the results on the IR of the action are to be kept, it is important

3256 /// to call this function, otherwise hidden information may be kept forever.

3257virtualvoid commit() {

3258// Nothing to be done, this action is not doing anything.

3259 }

3260 };

3261

3262 /// Utility to remember the position of an instruction.

3263classInsertionHandler {

3264 /// Position of an instruction.

3265 /// Either an instruction:

3266 /// - Is the first in a basic block: BB is used.

3267 /// - Has a previous instruction: PrevInst is used.

3268struct{

3269BasicBlock::iterator PrevInst;

3270BasicBlock *BB;

3271 } Point;

3272 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;

3273

3274 /// Remember whether or not the instruction had a previous instruction.

3275bool HasPrevInstruction;

3276

3277public:

3278 /// Record the position of \p Inst.

3279 InsertionHandler(Instruction *Inst) {

3280 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));

3281BasicBlock *BB = Inst->getParent();

3282

3283// Record where we would have to re-insert the instruction in the sequence

3284// of DbgRecords, if we ended up reinserting.

3285if (BB->IsNewDbgInfoFormat)

3286 BeforeDbgRecord = Inst->getDbgReinsertionPosition();

3287

3288if (HasPrevInstruction) {

3289 Point.PrevInst = std::prev(Inst->getIterator());

3290 }else {

3291 Point.BB = BB;

3292 }

3293 }

3294

3295 /// Insert \p Inst at the recorded position.

3296void insert(Instruction *Inst) {

3297if (HasPrevInstruction) {

3298if (Inst->getParent())

3299 Inst->removeFromParent();

3300 Inst->insertAfter(Point.PrevInst);

3301 }else {

3302BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();

3303if (Inst->getParent())

3304 Inst->moveBefore(*Point.BB, Position);

3305else

3306 Inst->insertBefore(*Point.BB, Position);

3307 }

3308

3309 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);

3310 }

3311 };

3312

3313 /// Move an instruction before another.

3314classInstructionMoveBefore :public TypePromotionAction {

3315 /// Original position of the instruction.

3316 InsertionHandler Position;

3317

3318public:

3319 /// Move \p Inst before \p Before.

3320 InstructionMoveBefore(Instruction *Inst,BasicBlock::iterator Before)

3321 : TypePromotionAction(Inst), Position(Inst) {

3322LLVM_DEBUG(dbgs() <<"Do: move: " << *Inst <<"\nbefore: " << *Before

3323 <<"\n");

3324 Inst->moveBefore(Before);

3325 }

3326

3327 /// Move the instruction back to its original position.

3328void undo() override{

3329LLVM_DEBUG(dbgs() <<"Undo: moveBefore: " << *Inst <<"\n");

3330 Position.insert(Inst);

3331 }

3332 };

3333

3334 /// Set the operand of an instruction with a new value.

3335classOperandSetter :public TypePromotionAction {

3336 /// Original operand of the instruction.

3337Value *Origin;

3338

3339 /// Index of the modified instruction.

3340unsignedIdx;

3341

3342public:

3343 /// Set \p Idx operand of \p Inst with \p NewVal.

3344 OperandSetter(Instruction *Inst,unsignedIdx,Value *NewVal)

3345 : TypePromotionAction(Inst),Idx(Idx) {

3346LLVM_DEBUG(dbgs() <<"Do: setOperand: " <<Idx <<"\n"

3347 <<"for:" << *Inst <<"\n"

3348 <<"with:" << *NewVal <<"\n");

3349 Origin = Inst->getOperand(Idx);

3350 Inst->setOperand(Idx, NewVal);

3351 }

3352

3353 /// Restore the original value of the instruction.

3354void undo() override{

3355LLVM_DEBUG(dbgs() <<"Undo: setOperand:" <<Idx <<"\n"

3356 <<"for: " << *Inst <<"\n"

3357 <<"with: " << *Origin <<"\n");

3358 Inst->setOperand(Idx, Origin);

3359 }

3360 };

3361

3362 /// Hide the operands of an instruction.

3363 /// Do as if this instruction was not using any of its operands.

3364classOperandsHider :public TypePromotionAction {

3365 /// The list of original operands.

3366SmallVector<Value *, 4> OriginalValues;

3367

3368public:

3369 /// Remove \p Inst from the uses of the operands of \p Inst.

3370 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {

3371LLVM_DEBUG(dbgs() <<"Do: OperandsHider: " << *Inst <<"\n");

3372unsigned NumOpnds = Inst->getNumOperands();

3373 OriginalValues.reserve(NumOpnds);

3374for (unsigned It = 0; It < NumOpnds; ++It) {

3375// Save the current operand.

3376Value *Val = Inst->getOperand(It);

3377 OriginalValues.push_back(Val);

3378// Set a dummy one.

3379// We could use OperandSetter here, but that would imply an overhead

3380// that we are not willing to pay.

3381 Inst->setOperand(It,PoisonValue::get(Val->getType()));

3382 }

3383 }

3384

3385 /// Restore the original list of uses.

3386void undo() override{

3387LLVM_DEBUG(dbgs() <<"Undo: OperandsHider: " << *Inst <<"\n");

3388for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)

3389 Inst->setOperand(It, OriginalValues[It]);

3390 }

3391 };

3392

3393 /// Build a truncate instruction.

3394classTruncBuilder :public TypePromotionAction {

3395Value *Val;

3396

3397public:

3398 /// Build a truncate instruction of \p Opnd producing a \p Ty

3399 /// result.

3400 /// trunc Opnd to Ty.

3401 TruncBuilder(Instruction *Opnd,Type *Ty) : TypePromotionAction(Opnd) {

3402IRBuilder<> Builder(Opnd);

3403 Builder.SetCurrentDebugLocation(DebugLoc());

3404 Val = Builder.CreateTrunc(Opnd, Ty,"promoted");

3405LLVM_DEBUG(dbgs() <<"Do: TruncBuilder: " << *Val <<"\n");

3406 }

3407

3408 /// Get the built value.

3409Value *getBuiltValue() {return Val; }

3410

3411 /// Remove the built instruction.

3412void undo() override{

3413LLVM_DEBUG(dbgs() <<"Undo: TruncBuilder: " << *Val <<"\n");

3414if (Instruction *IVal = dyn_cast<Instruction>(Val))

3415 IVal->eraseFromParent();

3416 }

3417 };

3418

3419 /// Build a sign extension instruction.

3420classSExtBuilder :public TypePromotionAction {

3421Value *Val;

3422

3423public:

3424 /// Build a sign extension instruction of \p Opnd producing a \p Ty

3425 /// result.

3426 /// sext Opnd to Ty.

3427 SExtBuilder(Instruction *InsertPt,Value *Opnd,Type *Ty)

3428 : TypePromotionAction(InsertPt) {

3429IRBuilder<> Builder(InsertPt);

3430 Val = Builder.CreateSExt(Opnd, Ty,"promoted");

3431LLVM_DEBUG(dbgs() <<"Do: SExtBuilder: " << *Val <<"\n");

3432 }

3433

3434 /// Get the built value.

3435Value *getBuiltValue() {return Val; }

3436

3437 /// Remove the built instruction.

3438void undo() override{

3439LLVM_DEBUG(dbgs() <<"Undo: SExtBuilder: " << *Val <<"\n");

3440if (Instruction *IVal = dyn_cast<Instruction>(Val))

3441 IVal->eraseFromParent();

3442 }

3443 };

3444

3445 /// Build a zero extension instruction.

3446classZExtBuilder :public TypePromotionAction {

3447Value *Val;

3448

3449public:

3450 /// Build a zero extension instruction of \p Opnd producing a \p Ty

3451 /// result.

3452 /// zext Opnd to Ty.

3453 ZExtBuilder(Instruction *InsertPt,Value *Opnd,Type *Ty)

3454 : TypePromotionAction(InsertPt) {

3455IRBuilder<> Builder(InsertPt);

3456 Builder.SetCurrentDebugLocation(DebugLoc());

3457 Val = Builder.CreateZExt(Opnd, Ty,"promoted");

3458LLVM_DEBUG(dbgs() <<"Do: ZExtBuilder: " << *Val <<"\n");

3459 }

3460

3461 /// Get the built value.

3462Value *getBuiltValue() {return Val; }

3463

3464 /// Remove the built instruction.

3465void undo() override{

3466LLVM_DEBUG(dbgs() <<"Undo: ZExtBuilder: " << *Val <<"\n");

3467if (Instruction *IVal = dyn_cast<Instruction>(Val))

3468 IVal->eraseFromParent();

3469 }

3470 };

3471

3472 /// Mutate an instruction to another type.

3473classTypeMutator :public TypePromotionAction {

3474 /// Record the original type.

3475Type *OrigTy;

3476

3477public:

3478 /// Mutate the type of \p Inst into \p NewTy.

3479 TypeMutator(Instruction *Inst,Type *NewTy)

3480 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {

3481LLVM_DEBUG(dbgs() <<"Do: MutateType: " << *Inst <<" with " << *NewTy

3482 <<"\n");

3483 Inst->mutateType(NewTy);

3484 }

3485

3486 /// Mutate the instruction back to its original type.

3487void undo() override{

3488LLVM_DEBUG(dbgs() <<"Undo: MutateType: " << *Inst <<" with " << *OrigTy

3489 <<"\n");

3490 Inst->mutateType(OrigTy);

3491 }

3492 };

3493

3494 /// Replace the uses of an instruction by another instruction.

3495classUsesReplacer :public TypePromotionAction {

3496 /// Helper structure to keep track of the replaced uses.

3497structInstructionAndIdx {

3498 /// The instruction using the instruction.

3499Instruction *Inst;

3500

3501 /// The index where this instruction is used for Inst.

3502unsignedIdx;

3503

3504 InstructionAndIdx(Instruction *Inst,unsignedIdx)

3505 : Inst(Inst),Idx(Idx) {}

3506 };

3507

3508 /// Keep track of the original uses (pair Instruction, Index).

3509SmallVector<InstructionAndIdx, 4> OriginalUses;

3510 /// Keep track of the debug users.

3511SmallVector<DbgValueInst *, 1> DbgValues;

3512 /// And non-instruction debug-users too.

3513SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;

3514

3515 /// Keep track of the new value so that we can undo it by replacing

3516 /// instances of the new value with the original value.

3517Value *New;

3518

3519usinguse_iterator =SmallVectorImpl<InstructionAndIdx>::iterator;

3520

3521public:

3522 /// Replace all the use of \p Inst by \p New.

3523 UsesReplacer(Instruction *Inst,Value *New)

3524 : TypePromotionAction(Inst),New(New) {

3525LLVM_DEBUG(dbgs() <<"Do: UsersReplacer: " << *Inst <<" with " << *New

3526 <<"\n");

3527// Record the original uses.

3528for (Use &U : Inst->uses()) {

3529Instruction *UserI = cast<Instruction>(U.getUser());

3530 OriginalUses.push_back(InstructionAndIdx(UserI,U.getOperandNo()));

3531 }

3532// Record the debug uses separately. They are not in the instruction's

3533// use list, but they are replaced by RAUW.

3534findDbgValues(DbgValues, Inst, &DbgVariableRecords);

3535

3536// Now, we can replace the uses.

3537 Inst->replaceAllUsesWith(New);

3538 }

3539

3540 /// Reassign the original uses of Inst to Inst.

3541void undo() override{

3542LLVM_DEBUG(dbgs() <<"Undo: UsersReplacer: " << *Inst <<"\n");

3543for (InstructionAndIdx &Use : OriginalUses)

3544Use.Inst->setOperand(Use.Idx, Inst);

3545// RAUW has replaced all original uses with references to the new value,

3546// including the debug uses. Since we are undoing the replacements,

3547// the original debug uses must also be reinstated to maintain the

3548// correctness and utility of debug value instructions.

3549for (auto *DVI : DbgValues)

3550 DVI->replaceVariableLocationOp(New, Inst);

3551// Similar story with DbgVariableRecords, the non-instruction

3552// representation of dbg.values.

3553for (DbgVariableRecord *DVR : DbgVariableRecords)

3554 DVR->replaceVariableLocationOp(New, Inst);

3555 }

3556 };

3557

3558 /// Remove an instruction from the IR.

3559classInstructionRemover :public TypePromotionAction {

3560 /// Original position of the instruction.

3561 InsertionHandler Inserter;

3562

3563 /// Helper structure to hide all the link to the instruction. In other

3564 /// words, this helps to do as if the instruction was removed.

3565 OperandsHider Hider;

3566

3567 /// Keep track of the uses replaced, if any.

3568 UsesReplacer *Replacer =nullptr;

3569

3570 /// Keep track of instructions removed.

3571 SetOfInstrs &RemovedInsts;

3572

3573public:

3574 /// Remove all reference of \p Inst and optionally replace all its

3575 /// uses with New.

3576 /// \p RemovedInsts Keep track of the instructions removed by this Action.

3577 /// \pre If !Inst->use_empty(), then New != nullptr

3578 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,

3579Value *New =nullptr)

3580 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),

3581 RemovedInsts(RemovedInsts) {

3582if (New)

3583 Replacer =new UsesReplacer(Inst, New);

3584LLVM_DEBUG(dbgs() <<"Do: InstructionRemover: " << *Inst <<"\n");

3585 RemovedInsts.insert(Inst);

3586 /// The instructions removed here will be freed after completing

3587 /// optimizeBlock() for all blocks as we need to keep track of the

3588 /// removed instructions during promotion.

3589 Inst->removeFromParent();

3590 }

3591

3592 ~InstructionRemover() override{delete Replacer; }

3593

3594 InstructionRemover &operator=(const InstructionRemover &other) =delete;

3595 InstructionRemover(const InstructionRemover &other) =delete;

3596

3597 /// Resurrect the instruction and reassign it to the proper uses if

3598 /// new value was provided when build this action.

3599void undo() override{

3600LLVM_DEBUG(dbgs() <<"Undo: InstructionRemover: " << *Inst <<"\n");

3601 Inserter.insert(Inst);

3602if (Replacer)

3603 Replacer->undo();

3604 Hider.undo();

3605 RemovedInsts.erase(Inst);

3606 }

3607 };

3608

3609public:

3610 /// Restoration point.

3611 /// The restoration point is a pointer to an action instead of an iterator

3612 /// because the iterator may be invalidated but not the pointer.

3613usingConstRestorationPt =const TypePromotionAction *;

3614

3615 TypePromotionTransaction(SetOfInstrs &RemovedInsts)

3616 : RemovedInsts(RemovedInsts) {}

3617

3618 /// Advocate every changes made in that transaction. Return true if any change

3619 /// happen.

3620bool commit();

3621

3622 /// Undo all the changes made after the given point.

3623void rollback(ConstRestorationPt Point);

3624

3625 /// Get the current restoration point.

3626 ConstRestorationPt getRestorationPoint()const;

3627

3628 /// \name API for IR modification with state keeping to support rollback.

3629 /// @{

3630 /// Same as Instruction::setOperand.

3631void setOperand(Instruction *Inst,unsignedIdx,Value *NewVal);

3632

3633 /// Same as Instruction::eraseFromParent.

3634voideraseInstruction(Instruction *Inst,Value *NewVal =nullptr);

3635

3636 /// Same as Value::replaceAllUsesWith.

3637voidreplaceAllUsesWith(Instruction *Inst,Value *New);

3638

3639 /// Same as Value::mutateType.

3640void mutateType(Instruction *Inst,Type *NewTy);

3641

3642 /// Same as IRBuilder::createTrunc.

3643Value *createTrunc(Instruction *Opnd,Type *Ty);

3644

3645 /// Same as IRBuilder::createSExt.

3646Value *createSExt(Instruction *Inst,Value *Opnd,Type *Ty);

3647

3648 /// Same as IRBuilder::createZExt.

3649Value *createZExt(Instruction *Inst,Value *Opnd,Type *Ty);

3650

3651private:

3652 /// The ordered list of actions made so far.

3653SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;

3654

3655usingCommitPt =

3656SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;

3657

3658 SetOfInstrs &RemovedInsts;

3659};

3660

3661}// end anonymous namespace

3662

3663void TypePromotionTransaction::setOperand(Instruction *Inst,unsignedIdx,

3664Value *NewVal) {

3665 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(

3666 Inst,Idx, NewVal));

3667}

3668

3669void TypePromotionTransaction::eraseInstruction(Instruction *Inst,

3670Value *NewVal) {

3671 Actions.push_back(

3672 std::make_unique<TypePromotionTransaction::InstructionRemover>(

3673 Inst, RemovedInsts, NewVal));

3674}

3675

3676void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,

3677Value *New) {

3678 Actions.push_back(

3679 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));

3680}

3681

3682void TypePromotionTransaction::mutateType(Instruction *Inst,Type *NewTy) {

3683 Actions.push_back(

3684 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));

3685}

3686

3687Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,Type *Ty) {

3688 std::unique_ptr<TruncBuilder>Ptr(new TruncBuilder(Opnd, Ty));

3689Value *Val =Ptr->getBuiltValue();

3690 Actions.push_back(std::move(Ptr));

3691return Val;

3692}

3693

3694Value *TypePromotionTransaction::createSExt(Instruction *Inst,Value *Opnd,

3695Type *Ty) {

3696 std::unique_ptr<SExtBuilder>Ptr(new SExtBuilder(Inst, Opnd, Ty));

3697Value *Val =Ptr->getBuiltValue();

3698 Actions.push_back(std::move(Ptr));

3699return Val;

3700}

3701

3702Value *TypePromotionTransaction::createZExt(Instruction *Inst,Value *Opnd,

3703Type *Ty) {

3704 std::unique_ptr<ZExtBuilder>Ptr(new ZExtBuilder(Inst, Opnd, Ty));

3705Value *Val =Ptr->getBuiltValue();

3706 Actions.push_back(std::move(Ptr));

3707return Val;

3708}

3709

3710TypePromotionTransaction::ConstRestorationPt

3711TypePromotionTransaction::getRestorationPoint() const{

3712return !Actions.empty() ? Actions.back().get() :nullptr;

3713}

3714

3715bool TypePromotionTransaction::commit() {

3716for (std::unique_ptr<TypePromotionAction> &Action : Actions)

3717 Action->commit();

3718boolModified = !Actions.empty();

3719 Actions.clear();

3720returnModified;

3721}

3722

3723void TypePromotionTransaction::rollback(

3724 TypePromotionTransaction::ConstRestorationPt Point) {

3725while (!Actions.empty() && Point != Actions.back().get()) {

3726 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();

3727 Curr->undo();

3728 }

3729}

3730

3731namespace{

3732

3733/// A helper class for matching addressing modes.

3734///

3735/// This encapsulates the logic for matching the target-legal addressing modes.

3736classAddressingModeMatcher {

3737SmallVectorImpl<Instruction *> &AddrModeInsts;

3738constTargetLowering &TLI;

3739constTargetRegisterInfo &TRI;

3740constDataLayout &DL;

3741constLoopInfo &LI;

3742const std::function<constDominatorTree &()> getDTFn;

3743

3744 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and

3745 /// the memory instruction that we're computing this address for.

3746Type *AccessTy;

3747unsigned AddrSpace;

3748Instruction *MemoryInst;

3749

3750 /// This is the addressing mode that we're building up. This is

3751 /// part of the return value of this addressing mode matching stuff.

3752ExtAddrMode &AddrMode;

3753

3754 /// The instructions inserted by other CodeGenPrepare optimizations.

3755const SetOfInstrs &InsertedInsts;

3756

3757 /// A map from the instructions to their type before promotion.

3758 InstrToOrigTy &PromotedInsts;

3759

3760 /// The ongoing transaction where every action should be registered.

3761 TypePromotionTransaction &TPT;

3762

3763// A GEP which has too large offset to be folded into the addressing mode.

3764 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;

3765

3766 /// This is set to true when we should not do profitability checks.

3767 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.

3768bool IgnoreProfitability;

3769

3770 /// True if we are optimizing for size.

3771bool OptSize =false;

3772

3773ProfileSummaryInfo *PSI;

3774BlockFrequencyInfo *BFI;

3775

3776 AddressingModeMatcher(

3777SmallVectorImpl<Instruction *> &AMI,constTargetLowering &TLI,

3778constTargetRegisterInfo &TRI,constLoopInfo &LI,

3779const std::function<constDominatorTree &()> getDTFn,Type *AT,

3780unsigned AS,Instruction *MI,ExtAddrMode &AM,

3781const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,

3782 TypePromotionTransaction &TPT,

3783 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,

3784bool OptSize,ProfileSummaryInfo *PSI,BlockFrequencyInfo *BFI)

3785 : AddrModeInsts(AMI), TLI(TLI),TRI(TRI),

3786DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),

3787 AccessTy(AT), AddrSpace(AS), MemoryInst(MI),AddrMode(AM),

3788 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),

3789 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI),BFI(BFI) {

3790 IgnoreProfitability =false;

3791 }

3792

3793public:

3794 /// Find the maximal addressing mode that a load/store of V can fold,

3795 /// give an access type of AccessTy. This returns a list of involved

3796 /// instructions in AddrModeInsts.

3797 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare

3798 /// optimizations.

3799 /// \p PromotedInsts maps the instructions to their type before promotion.

3800 /// \p The ongoing transaction where every action should be registered.

3801staticExtAddrMode

3802Match(Value *V,Type *AccessTy,unsigned AS,Instruction *MemoryInst,

3803SmallVectorImpl<Instruction *> &AddrModeInsts,

3804constTargetLowering &TLI,constLoopInfo &LI,

3805const std::function<constDominatorTree &()> getDTFn,

3806constTargetRegisterInfo &TRI,const SetOfInstrs &InsertedInsts,

3807 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,

3808 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,

3809bool OptSize,ProfileSummaryInfo *PSI,BlockFrequencyInfo *BFI) {

3810ExtAddrMode Result;

3811

3812boolSuccess = AddressingModeMatcher(AddrModeInsts, TLI,TRI, LI, getDTFn,

3813 AccessTy, AS, MemoryInst, Result,

3814 InsertedInsts, PromotedInsts, TPT,

3815 LargeOffsetGEP, OptSize, PSI, BFI)

3816 .matchAddr(V, 0);

3817 (void)Success;

3818assert(Success &&"Couldn't select *anything*?");

3819returnResult;

3820 }

3821

3822private:

3823bool matchScaledValue(Value *ScaleReg, int64_t Scale,unsignedDepth);

3824bool matchAddr(Value *Addr,unsignedDepth);

3825bool matchOperationAddr(User *AddrInst,unsigned Opcode,unsignedDepth,

3826bool *MovedAway =nullptr);

3827bool isProfitableToFoldIntoAddressingMode(Instruction *I,

3828ExtAddrMode &AMBefore,

3829ExtAddrMode &AMAfter);

3830bool valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,Value *KnownLive2);

3831bool isPromotionProfitable(unsigned NewCost,unsigned OldCost,

3832Value *PromotedOperand)const;

3833};

3834

3835classPhiNodeSet;

3836

3837/// An iterator for PhiNodeSet.

3838classPhiNodeSetIterator {

3839 PhiNodeSet *constSet;

3840size_t CurrentIndex = 0;

3841

3842public:

3843 /// The constructor. Start should point to either a valid element, or be equal

3844 /// to the size of the underlying SmallVector of the PhiNodeSet.

3845 PhiNodeSetIterator(PhiNodeSet *const Set,size_t Start);

3846PHINode *operator*()const;

3847 PhiNodeSetIterator &operator++();

3848booloperator==(const PhiNodeSetIterator &RHS)const;

3849booloperator!=(const PhiNodeSetIterator &RHS)const;

3850};

3851

3852/// Keeps a set of PHINodes.

3853///

3854/// This is a minimal set implementation for a specific use case:

3855/// It is very fast when there are very few elements, but also provides good

3856/// performance when there are many. It is similar to SmallPtrSet, but also

3857/// provides iteration by insertion order, which is deterministic and stable

3858/// across runs. It is also similar to SmallSetVector, but provides removing

3859/// elements in O(1) time. This is achieved by not actually removing the element

3860/// from the underlying vector, so comes at the cost of using more memory, but

3861/// that is fine, since PhiNodeSets are used as short lived objects.

3862classPhiNodeSet {

3863friendclassPhiNodeSetIterator;

3864

3865usingMapType =SmallDenseMap<PHINode *, size_t, 32>;

3866usingiterator = PhiNodeSetIterator;

3867

3868 /// Keeps the elements in the order of their insertion in the underlying

3869 /// vector. To achieve constant time removal, it never deletes any element.

3870SmallVector<PHINode *, 32>NodeList;

3871

3872 /// Keeps the elements in the underlying set implementation. This (and not the

3873 /// NodeList defined above) is the source of truth on whether an element

3874 /// is actually in the collection.

3875 MapType NodeMap;

3876

3877 /// Points to the first valid (not deleted) element when the set is not empty

3878 /// and the value is not zero. Equals to the size of the underlying vector

3879 /// when the set is empty. When the value is 0, as in the beginning, the

3880 /// first element may or may not be valid.

3881size_t FirstValidElement = 0;

3882

3883public:

3884 /// Inserts a new element to the collection.

3885 /// \returns true if the element is actually added, i.e. was not in the

3886 /// collection before the operation.

3887bool insert(PHINode *Ptr) {

3888if (NodeMap.insert(std::make_pair(Ptr,NodeList.size())).second) {

3889NodeList.push_back(Ptr);

3890returntrue;

3891 }

3892returnfalse;

3893 }

3894

3895 /// Removes the element from the collection.

3896 /// \returns whether the element is actually removed, i.e. was in the

3897 /// collection before the operation.

3898boolerase(PHINode *Ptr) {

3899if (NodeMap.erase(Ptr)) {

3900 SkipRemovedElements(FirstValidElement);

3901returntrue;

3902 }

3903returnfalse;

3904 }

3905

3906 /// Removes all elements and clears the collection.

3907void clear() {

3908 NodeMap.clear();

3909NodeList.clear();

3910 FirstValidElement = 0;

3911 }

3912

3913 /// \returns an iterator that will iterate the elements in the order of

3914 /// insertion.

3915 iteratorbegin() {

3916if (FirstValidElement == 0)

3917 SkipRemovedElements(FirstValidElement);

3918return PhiNodeSetIterator(this, FirstValidElement);

3919 }

3920

3921 /// \returns an iterator that points to the end of the collection.

3922 iteratorend() {return PhiNodeSetIterator(this,NodeList.size()); }

3923

3924 /// Returns the number of elements in the collection.

3925size_tsize() const{return NodeMap.size(); }

3926

3927 /// \returns 1 if the given element is in the collection, and 0 if otherwise.

3928size_tcount(PHINode *Ptr) const{return NodeMap.count(Ptr); }

3929

3930private:

3931 /// Updates the CurrentIndex so that it will point to a valid element.

3932 ///

3933 /// If the element of NodeList at CurrentIndex is valid, it does not

3934 /// change it. If there are no more valid elements, it updates CurrentIndex

3935 /// to point to the end of the NodeList.

3936void SkipRemovedElements(size_t &CurrentIndex) {

3937while (CurrentIndex <NodeList.size()) {

3938auto it = NodeMap.find(NodeList[CurrentIndex]);

3939// If the element has been deleted and added again later, NodeMap will

3940// point to a different index, so CurrentIndex will still be invalid.

3941if (it != NodeMap.end() && it->second == CurrentIndex)

3942break;

3943 ++CurrentIndex;

3944 }

3945 }

3946};

3947

3948PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set,size_t Start)

3949 :Set(Set), CurrentIndex(Start) {}

3950

3951PHINode *PhiNodeSetIterator::operator*() const{

3952assert(CurrentIndex < Set->NodeList.size() &&

3953"PhiNodeSet access out of range");

3954returnSet->NodeList[CurrentIndex];

3955}

3956

3957PhiNodeSetIterator &PhiNodeSetIterator::operator++() {

3958assert(CurrentIndex < Set->NodeList.size() &&

3959"PhiNodeSet access out of range");

3960 ++CurrentIndex;

3961Set->SkipRemovedElements(CurrentIndex);

3962return *this;

3963}

3964

3965bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const{

3966return CurrentIndex ==RHS.CurrentIndex;

3967}

3968

3969bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const{

3970return !((*this) ==RHS);

3971}

3972

3973/// Keep track of simplification of Phi nodes.

3974/// Accept the set of all phi nodes and erase phi node from this set

3975/// if it is simplified.

3976classSimplificationTracker {

3977DenseMap<Value *, Value *> Storage;

3978constSimplifyQuery &SQ;

3979// Tracks newly created Phi nodes. The elements are iterated by insertion

3980// order.

3981 PhiNodeSet AllPhiNodes;

3982// Tracks newly created Select nodes.

3983SmallPtrSet<SelectInst *, 32> AllSelectNodes;

3984

3985public:

3986 SimplificationTracker(constSimplifyQuery &sq) : SQ(sq) {}

3987

3988Value *Get(Value *V) {

3989do {

3990auto SV = Storage.find(V);

3991if (SV == Storage.end())

3992returnV;

3993V = SV->second;

3994 }while (true);

3995 }

3996

3997Value *Simplify(Value *Val) {

3998SmallVector<Value *, 32> WorkList;

3999SmallPtrSet<Value *, 32> Visited;

4000 WorkList.push_back(Val);

4001while (!WorkList.empty()) {

4002auto *P = WorkList.pop_back_val();

4003if (!Visited.insert(P).second)

4004continue;

4005if (auto *PI = dyn_cast<Instruction>(P))

4006if (Value *V =simplifyInstruction(cast<Instruction>(PI), SQ)) {

4007for (auto *U : PI->users())

4008 WorkList.push_back(cast<Value>(U));

4009 Put(PI, V);

4010 PI->replaceAllUsesWith(V);

4011if (auto *PHI = dyn_cast<PHINode>(PI))

4012 AllPhiNodes.erase(PHI);

4013if (auto *Select = dyn_cast<SelectInst>(PI))

4014 AllSelectNodes.erase(Select);

4015 PI->eraseFromParent();

4016 }

4017 }

4018return Get(Val);

4019 }

4020

4021void Put(Value *From,Value *To) { Storage.insert({From, To}); }

4022

4023void ReplacePhi(PHINode *From,PHINode *To) {

4024Value *OldReplacement = Get(From);

4025while (OldReplacement !=From) {

4026From = To;

4027 To = dyn_cast<PHINode>(OldReplacement);

4028 OldReplacement = Get(From);

4029 }

4030assert(To && Get(To) == To &&"Replacement PHI node is already replaced.");

4031 Put(From, To);

4032From->replaceAllUsesWith(To);

4033 AllPhiNodes.erase(From);

4034From->eraseFromParent();

4035 }

4036

4037 PhiNodeSet &newPhiNodes() {return AllPhiNodes; }

4038

4039void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }

4040

4041void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }

4042

4043unsigned countNewPhiNodes() const{return AllPhiNodes.size(); }

4044

4045unsigned countNewSelectNodes() const{return AllSelectNodes.size(); }

4046

4047void destroyNewNodes(Type *CommonType) {

4048// For safe erasing, replace the uses with dummy value first.

4049auto *Dummy =PoisonValue::get(CommonType);

4050for (auto *I : AllPhiNodes) {

4051I->replaceAllUsesWith(Dummy);

4052I->eraseFromParent();

4053 }

4054 AllPhiNodes.clear();

4055for (auto *I : AllSelectNodes) {

4056I->replaceAllUsesWith(Dummy);

4057I->eraseFromParent();

4058 }

4059 AllSelectNodes.clear();

4060 }

4061};

4062

4063/// A helper class for combining addressing modes.

4064classAddressingModeCombiner {

4065typedefDenseMap<Value *, Value *> FoldAddrToValueMapping;

4066typedef std::pair<PHINode *, PHINode *> PHIPair;

4067

4068private:

4069 /// The addressing modes we've collected.

4070SmallVector<ExtAddrMode, 16> AddrModes;

4071

4072 /// The field in which the AddrModes differ, when we have more than one.

4073 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;

4074

4075 /// Are the AddrModes that we have all just equal to their original values?

4076bool AllAddrModesTrivial =true;

4077

4078 /// Common Type for all different fields in addressing modes.

4079Type *CommonType =nullptr;

4080

4081 /// SimplifyQuery for simplifyInstruction utility.

4082constSimplifyQuery &SQ;

4083

4084 /// Original Address.

4085Value *Original;

4086

4087 /// Common value among addresses

4088Value *CommonValue =nullptr;

4089

4090public:

4091 AddressingModeCombiner(constSimplifyQuery &_SQ,Value *OriginalValue)

4092 : SQ(_SQ), Original(OriginalValue) {}

4093

4094 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }

4095

4096 /// Get the combined AddrMode

4097constExtAddrMode &getAddrMode() const{return AddrModes[0]; }

4098

4099 /// Add a new AddrMode if it's compatible with the AddrModes we already

4100 /// have.

4101 /// \return True iff we succeeded in doing so.

4102bool addNewAddrMode(ExtAddrMode &NewAddrMode) {

4103// Take note of if we have any non-trivial AddrModes, as we need to detect

4104// when all AddrModes are trivial as then we would introduce a phi or select

4105// which just duplicates what's already there.

4106 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();

4107

4108// If this is the first addrmode then everything is fine.

4109if (AddrModes.empty()) {

4110 AddrModes.emplace_back(NewAddrMode);

4111returntrue;

4112 }

4113

4114// Figure out how different this is from the other address modes, which we

4115// can do just by comparing against the first one given that we only care

4116// about the cumulative difference.

4117 ExtAddrMode::FieldName ThisDifferentField =

4118 AddrModes[0].compare(NewAddrMode);

4119if (DifferentField == ExtAddrMode::NoField)

4120 DifferentField = ThisDifferentField;

4121elseif (DifferentField != ThisDifferentField)

4122 DifferentField = ExtAddrMode::MultipleFields;

4123

4124// If NewAddrMode differs in more than one dimension we cannot handle it.

4125bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;

4126

4127// If Scale Field is different then we reject.

4128 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;

4129

4130// We also must reject the case when base offset is different and

4131// scale reg is not null, we cannot handle this case due to merge of

4132// different offsets will be used as ScaleReg.

4133 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||

4134 !NewAddrMode.ScaledReg);

4135

4136// We also must reject the case when GV is different and BaseReg installed

4137// due to we want to use base reg as a merge of GV values.

4138 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||

4139 !NewAddrMode.HasBaseReg);

4140

4141// Even if NewAddMode is the same we still need to collect it due to

4142// original value is different. And later we will need all original values

4143// as anchors during finding the common Phi node.

4144if (CanHandle)

4145 AddrModes.emplace_back(NewAddrMode);

4146else

4147 AddrModes.clear();

4148

4149return CanHandle;

4150 }

4151

4152 /// Combine the addressing modes we've collected into a single

4153 /// addressing mode.

4154 /// \return True iff we successfully combined them or we only had one so

4155 /// didn't need to combine them anyway.

4156bool combineAddrModes() {

4157// If we have no AddrModes then they can't be combined.

4158if (AddrModes.size() == 0)

4159returnfalse;

4160

4161// A single AddrMode can trivially be combined.

4162if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)

4163returntrue;

4164

4165// If the AddrModes we collected are all just equal to the value they are

4166// derived from then combining them wouldn't do anything useful.

4167if (AllAddrModesTrivial)

4168returnfalse;

4169

4170if (!addrModeCombiningAllowed())

4171returnfalse;

4172

4173// Build a map between <original value, basic block where we saw it> to

4174// value of base register.

4175// Bail out if there is no common type.

4176 FoldAddrToValueMappingMap;

4177if (!initializeMap(Map))

4178returnfalse;

4179

4180 CommonValue = findCommon(Map);

4181if (CommonValue)

4182 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);

4183return CommonValue !=nullptr;

4184 }

4185

4186private:

4187 /// `CommonValue` may be a placeholder inserted by us.

4188 /// If the placeholder is not used, we should remove this dead instruction.

4189void eraseCommonValueIfDead() {

4190if (CommonValue && CommonValue->getNumUses() == 0)

4191if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))

4192 CommonInst->eraseFromParent();

4193 }

4194

4195 /// Initialize Map with anchor values. For address seen

4196 /// we set the value of different field saw in this address.

4197 /// At the same time we find a common type for different field we will

4198 /// use to create new Phi/Select nodes. Keep it in CommonType field.

4199 /// Return false if there is no common type found.

4200bool initializeMap(FoldAddrToValueMapping &Map) {

4201// Keep track of keys where the value is null. We will need to replace it

4202// with constant null when we know the common type.

4203SmallVector<Value *, 2> NullValue;

4204Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());

4205for (auto &AM : AddrModes) {

4206Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);

4207if (DV) {

4208auto *Type = DV->getType();

4209if (CommonType && CommonType !=Type)

4210returnfalse;

4211 CommonType =Type;

4212Map[AM.OriginalValue] = DV;

4213 }else {

4214 NullValue.push_back(AM.OriginalValue);

4215 }

4216 }

4217assert(CommonType &&"At least one non-null value must be!");

4218for (auto *V : NullValue)

4219Map[V] =Constant::getNullValue(CommonType);

4220returntrue;

4221 }

4222

4223 /// We have mapping between value A and other value B where B was a field in

4224 /// addressing mode represented by A. Also we have an original value C

4225 /// representing an address we start with. Traversing from C through phi and

4226 /// selects we ended up with A's in a map. This utility function tries to find

4227 /// a value V which is a field in addressing mode C and traversing through phi

4228 /// nodes and selects we will end up in corresponded values B in a map.

4229 /// The utility will create a new Phi/Selects if needed.

4230// The simple example looks as follows:

4231// BB1:

4232// p1 = b1 + 40

4233// br cond BB2, BB3

4234// BB2:

4235// p2 = b2 + 40

4236// br BB3

4237// BB3:

4238// p = phi [p1, BB1], [p2, BB2]

4239// v = load p

4240// Map is

4241// p1 -> b1

4242// p2 -> b2

4243// Request is

4244// p -> ?

4245// The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.

4246Value *findCommon(FoldAddrToValueMapping &Map) {

4247// Tracks the simplification of newly created phi nodes. The reason we use

4248// this mapping is because we will add new created Phi nodes in AddrToBase.

4249// Simplification of Phi nodes is recursive, so some Phi node may

4250// be simplified after we added it to AddrToBase. In reality this

4251// simplification is possible only if original phi/selects were not

4252// simplified yet.

4253// Using this mapping we can find the current value in AddrToBase.

4254 SimplificationTrackerST(SQ);

4255

4256// First step, DFS to create PHI nodes for all intermediate blocks.

4257// Also fill traverse order for the second step.

4258SmallVector<Value *, 32> TraverseOrder;

4259 InsertPlaceholders(Map, TraverseOrder, ST);

4260

4261// Second Step, fill new nodes by merged values and simplify if possible.

4262 FillPlaceholders(Map, TraverseOrder, ST);

4263

4264if (!AddrSinkNewSelects &&ST.countNewSelectNodes() > 0) {

4265ST.destroyNewNodes(CommonType);

4266returnnullptr;

4267 }

4268

4269// Now we'd like to match New Phi nodes to existed ones.

4270unsigned PhiNotMatchedCount = 0;

4271if (!MatchPhiSet(ST,AddrSinkNewPhis, PhiNotMatchedCount)) {

4272ST.destroyNewNodes(CommonType);

4273returnnullptr;

4274 }

4275

4276auto *Result =ST.Get(Map.find(Original)->second);

4277if (Result) {

4278 NumMemoryInstsPhiCreated +=ST.countNewPhiNodes() + PhiNotMatchedCount;

4279 NumMemoryInstsSelectCreated +=ST.countNewSelectNodes();

4280 }

4281returnResult;

4282 }

4283

4284 /// Try to match PHI node to Candidate.

4285 /// Matcher tracks the matched Phi nodes.

4286bool MatchPhiNode(PHINode *PHI,PHINode *Candidate,

4287SmallSetVector<PHIPair, 8> &Matcher,

4288 PhiNodeSet &PhiNodesToMatch) {

4289SmallVector<PHIPair, 8> WorkList;

4290 Matcher.insert({PHI, Candidate});

4291SmallSet<PHINode *, 8> MatchedPHIs;

4292 MatchedPHIs.insert(PHI);

4293 WorkList.push_back({PHI, Candidate});

4294SmallSet<PHIPair, 8> Visited;

4295while (!WorkList.empty()) {

4296auto Item = WorkList.pop_back_val();

4297if (!Visited.insert(Item).second)

4298continue;

4299// We iterate over all incoming values to Phi to compare them.

4300// If values are different and both of them Phi and the first one is a

4301// Phi we added (subject to match) and both of them is in the same basic

4302// block then we can match our pair if values match. So we state that

4303// these values match and add it to work list to verify that.

4304for (auto *B : Item.first->blocks()) {

4305Value *FirstValue = Item.first->getIncomingValueForBlock(B);

4306Value *SecondValue = Item.second->getIncomingValueForBlock(B);

4307if (FirstValue == SecondValue)

4308continue;

4309

4310PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);

4311PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);

4312

4313// One of them is not Phi or

4314// The first one is not Phi node from the set we'd like to match or

4315// Phi nodes from different basic blocks then

4316// we will not be able to match.

4317if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||

4318 FirstPhi->getParent() != SecondPhi->getParent())

4319returnfalse;

4320

4321// If we already matched them then continue.

4322if (Matcher.count({FirstPhi, SecondPhi}))

4323continue;

4324// So the values are different and does not match. So we need them to

4325// match. (But we register no more than one match per PHI node, so that

4326// we won't later try to replace them twice.)

4327if (MatchedPHIs.insert(FirstPhi).second)

4328 Matcher.insert({FirstPhi, SecondPhi});

4329// But me must check it.

4330 WorkList.push_back({FirstPhi, SecondPhi});

4331 }

4332 }

4333returntrue;

4334 }

4335

4336 /// For the given set of PHI nodes (in the SimplificationTracker) try

4337 /// to find their equivalents.

4338 /// Returns false if this matching fails and creation of new Phi is disabled.

4339bool MatchPhiSet(SimplificationTracker &ST,bool AllowNewPhiNodes,

4340unsigned &PhiNotMatchedCount) {

4341// Matched and PhiNodesToMatch iterate their elements in a deterministic

4342// order, so the replacements (ReplacePhi) are also done in a deterministic

4343// order.

4344SmallSetVector<PHIPair, 8> Matched;

4345SmallPtrSet<PHINode *, 8> WillNotMatch;

4346 PhiNodeSet &PhiNodesToMatch =ST.newPhiNodes();

4347while (PhiNodesToMatch.size()) {

4348PHINode *PHI = *PhiNodesToMatch.begin();

4349

4350// Add us, if no Phi nodes in the basic block we do not match.

4351 WillNotMatch.clear();

4352 WillNotMatch.insert(PHI);

4353

4354// Traverse all Phis until we found equivalent or fail to do that.

4355bool IsMatched =false;

4356for (auto &P :PHI->getParent()->phis()) {

4357// Skip new Phi nodes.

4358if (PhiNodesToMatch.count(&P))

4359continue;

4360if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))

4361break;

4362// If it does not match, collect all Phi nodes from matcher.

4363// if we end up with no match, them all these Phi nodes will not match

4364// later.

4365for (auto M : Matched)

4366 WillNotMatch.insert(M.first);

4367 Matched.clear();

4368 }

4369if (IsMatched) {

4370// Replace all matched values and erase them.

4371for (auto MV : Matched)

4372ST.ReplacePhi(MV.first, MV.second);

4373 Matched.clear();

4374continue;

4375 }

4376// If we are not allowed to create new nodes then bail out.

4377if (!AllowNewPhiNodes)

4378returnfalse;

4379// Just remove all seen values in matcher. They will not match anything.

4380 PhiNotMatchedCount += WillNotMatch.size();

4381for (auto *P : WillNotMatch)

4382 PhiNodesToMatch.erase(P);

4383 }

4384returntrue;

4385 }

4386 /// Fill the placeholders with values from predecessors and simplify them.

4387void FillPlaceholders(FoldAddrToValueMapping &Map,

4388SmallVectorImpl<Value *> &TraverseOrder,

4389 SimplificationTracker &ST) {

4390while (!TraverseOrder.empty()) {

4391Value *Current = TraverseOrder.pop_back_val();

4392assert(Map.contains(Current) &&"No node to fill!!!");

4393Value *V =Map[Current];

4394

4395if (SelectInst *Select = dyn_cast<SelectInst>(V)) {

4396// CurrentValue also must be Select.

4397auto *CurrentSelect = cast<SelectInst>(Current);

4398auto *TrueValue = CurrentSelect->getTrueValue();

4399assert(Map.contains(TrueValue) &&"No True Value!");

4400Select->setTrueValue(ST.Get(Map[TrueValue]));

4401auto *FalseValue = CurrentSelect->getFalseValue();

4402assert(Map.contains(FalseValue) &&"No False Value!");

4403Select->setFalseValue(ST.Get(Map[FalseValue]));

4404 }else {

4405// Must be a Phi node then.

4406auto *PHI = cast<PHINode>(V);

4407// Fill the Phi node with values from predecessors.

4408for (auto *B :predecessors(PHI->getParent())) {

4409Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);

4410assert(Map.contains(PV) &&"No predecessor Value!");

4411PHI->addIncoming(ST.Get(Map[PV]),B);

4412 }

4413 }

4414Map[Current] =ST.Simplify(V);

4415 }

4416 }

4417

4418 /// Starting from original value recursively iterates over def-use chain up to

4419 /// known ending values represented in a map. For each traversed phi/select

4420 /// inserts a placeholder Phi or Select.

4421 /// Reports all new created Phi/Select nodes by adding them to set.

4422 /// Also reports and order in what values have been traversed.

4423void InsertPlaceholders(FoldAddrToValueMapping &Map,

4424SmallVectorImpl<Value *> &TraverseOrder,

4425 SimplificationTracker &ST) {

4426SmallVector<Value *, 32> Worklist;

4427assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&

4428"Address must be a Phi or Select node");

4429auto *Dummy =PoisonValue::get(CommonType);

4430 Worklist.push_back(Original);

4431while (!Worklist.empty()) {

4432Value *Current = Worklist.pop_back_val();

4433// if it is already visited or it is an ending value then skip it.

4434if (Map.contains(Current))

4435continue;

4436 TraverseOrder.push_back(Current);

4437

4438// CurrentValue must be a Phi node or select. All others must be covered

4439// by anchors.

4440if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {

4441// Is it OK to get metadata from OrigSelect?!

4442// Create a Select placeholder with dummy value.

4443SelectInst *Select =

4444SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,

4445 CurrentSelect->getName(),

4446 CurrentSelect->getIterator(), CurrentSelect);

4447Map[Current] =Select;

4448ST.insertNewSelect(Select);

4449// We are interested in True and False values.

4450 Worklist.push_back(CurrentSelect->getTrueValue());

4451 Worklist.push_back(CurrentSelect->getFalseValue());

4452 }else {

4453// It must be a Phi node then.

4454PHINode *CurrentPhi = cast<PHINode>(Current);

4455unsigned PredCount = CurrentPhi->getNumIncomingValues();

4456PHINode *PHI =

4457PHINode::Create(CommonType, PredCount,"sunk_phi", CurrentPhi->getIterator());

4458Map[Current] =PHI;

4459ST.insertNewPhi(PHI);

4460append_range(Worklist, CurrentPhi->incoming_values());

4461 }

4462 }

4463 }

4464

4465bool addrModeCombiningAllowed() {

4466if (DisableComplexAddrModes)

4467returnfalse;

4468switch (DifferentField) {

4469default:

4470returnfalse;

4471case ExtAddrMode::BaseRegField:

4472returnAddrSinkCombineBaseReg;

4473case ExtAddrMode::BaseGVField:

4474returnAddrSinkCombineBaseGV;

4475case ExtAddrMode::BaseOffsField:

4476returnAddrSinkCombineBaseOffs;

4477case ExtAddrMode::ScaledRegField:

4478returnAddrSinkCombineScaledReg;

4479 }

4480 }

4481};

4482}// end anonymous namespace

4483

4484/// Try adding ScaleReg*Scale to the current addressing mode.

4485/// Return true and update AddrMode if this addr mode is legal for the target,

4486/// false if not.

4487bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,

4488unsignedDepth) {

4489// If Scale is 1, then this is the same as adding ScaleReg to the addressing

4490// mode. Just process that directly.

4491if (Scale == 1)

4492return matchAddr(ScaleReg,Depth);

4493

4494// If the scale is 0, it takes nothing to add this.

4495if (Scale == 0)

4496returntrue;

4497

4498// If we already have a scale of this value, we can add to it, otherwise, we

4499// need an available scale field.

4500if (AddrMode.Scale != 0 &&AddrMode.ScaledReg != ScaleReg)

4501returnfalse;

4502

4503ExtAddrMode TestAddrMode =AddrMode;

4504

4505// Add scale to turn X*4+X*3 -> X*7. This could also do things like

4506// [A+B + A*7] -> [B+A*8].

4507 TestAddrMode.Scale += Scale;

4508 TestAddrMode.ScaledReg = ScaleReg;

4509

4510// If the new address isn't legal, bail out.

4511if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))

4512returnfalse;

4513

4514// It was legal, so commit it.

4515AddrMode = TestAddrMode;

4516

4517// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now

4518// to see if ScaleReg is actually X+C. If so, we can turn this into adding

4519// X*Scale + C*Scale to addr mode. If we found available IV increment, do not

4520// go any further: we can reuse it and cannot eliminate it.

4521ConstantInt *CI =nullptr;

4522Value *AddLHS =nullptr;

4523if (isa<Instruction>(ScaleReg) &&// not a constant expr.

4524match(ScaleReg,m_Add(m_Value(AddLHS),m_ConstantInt(CI))) &&

4525 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {

4526 TestAddrMode.InBounds =false;

4527 TestAddrMode.ScaledReg = AddLHS;

4528 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;

4529

4530// If this addressing mode is legal, commit it and remember that we folded

4531// this instruction.

4532if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {

4533 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));

4534AddrMode = TestAddrMode;

4535returntrue;

4536 }

4537// Restore status quo.

4538 TestAddrMode =AddrMode;

4539 }

4540

4541// If this is an add recurrence with a constant step, return the increment

4542// instruction and the canonicalized step.

4543auto GetConstantStep =

4544 [this](constValue *V) -> std::optional<std::pair<Instruction *, APInt>> {

4545auto *PN = dyn_cast<PHINode>(V);

4546if (!PN)

4547return std::nullopt;

4548auto IVInc =getIVIncrement(PN, &LI);

4549if (!IVInc)

4550return std::nullopt;

4551// TODO: The result of the intrinsics above is two-complement. However when

4552// IV inc is expressed as add or sub, iv.next is potentially a poison value.

4553// If it has nuw or nsw flags, we need to make sure that these flags are

4554// inferrable at the point of memory instruction. Otherwise we are replacing

4555// well-defined two-complement computation with poison. Currently, to avoid

4556// potentially complex analysis needed to prove this, we reject such cases.

4557if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))

4558if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())

4559return std::nullopt;

4560if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))

4561return std::make_pair(IVInc->first, ConstantStep->getValue());

4562return std::nullopt;

4563 };

4564

4565// Try to account for the following special case:

4566// 1. ScaleReg is an inductive variable;

4567// 2. We use it with non-zero offset;

4568// 3. IV's increment is available at the point of memory instruction.

4569//

4570// In this case, we may reuse the IV increment instead of the IV Phi to

4571// achieve the following advantages:

4572// 1. If IV step matches the offset, we will have no need in the offset;

4573// 2. Even if they don't match, we will reduce the overlap of living IV

4574// and IV increment, that will potentially lead to better register

4575// assignment.

4576if (AddrMode.BaseOffs) {

4577if (auto IVStep = GetConstantStep(ScaleReg)) {

4578Instruction *IVInc = IVStep->first;

4579// The following assert is important to ensure a lack of infinite loops.

4580// This transforms is (intentionally) the inverse of the one just above.

4581// If they don't agree on the definition of an increment, we'd alternate

4582// back and forth indefinitely.

4583assert(isIVIncrement(IVInc, &LI) &&"implied by GetConstantStep");

4584APInt Step = IVStep->second;

4585APInt Offset = Step *AddrMode.Scale;

4586if (Offset.isSignedIntN(64)) {

4587 TestAddrMode.InBounds =false;

4588 TestAddrMode.ScaledReg = IVInc;

4589 TestAddrMode.BaseOffs -=Offset.getLimitedValue();

4590// If this addressing mode is legal, commit it..

4591// (Note that we defer the (expensive) domtree base legality check

4592// to the very last possible point.)

4593if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&

4594 getDTFn().dominates(IVInc, MemoryInst)) {

4595 AddrModeInsts.push_back(cast<Instruction>(IVInc));

4596AddrMode = TestAddrMode;

4597returntrue;

4598 }

4599// Restore status quo.

4600 TestAddrMode =AddrMode;

4601 }

4602 }

4603 }

4604

4605// Otherwise, just return what we have.

4606returntrue;

4607}

4608

4609/// This is a little filter, which returns true if an addressing computation

4610/// involving I might be folded into a load/store accessing it.

4611/// This doesn't need to be perfect, but needs to accept at least

4612/// the set of instructions that MatchOperationAddr can.

4613staticboolMightBeFoldableInst(Instruction *I) {

4614switch (I->getOpcode()) {

4615case Instruction::BitCast:

4616case Instruction::AddrSpaceCast:

4617// Don't touch identity bitcasts.

4618if (I->getType() ==I->getOperand(0)->getType())

4619returnfalse;

4620returnI->getType()->isIntOrPtrTy();

4621case Instruction::PtrToInt:

4622// PtrToInt is always a noop, as we know that the int type is pointer sized.

4623returntrue;

4624case Instruction::IntToPtr:

4625// We know the input is intptr_t, so this is foldable.

4626returntrue;

4627case Instruction::Add:

4628returntrue;

4629case Instruction::Mul:

4630case Instruction::Shl:

4631// Can only handle X*C and X << C.

4632return isa<ConstantInt>(I->getOperand(1));

4633case Instruction::GetElementPtr:

4634returntrue;

4635default:

4636returnfalse;

4637 }

4638}

4639

4640/// Check whether or not \p Val is a legal instruction for \p TLI.

4641/// \note \p Val is assumed to be the product of some type promotion.

4642/// Therefore if \p Val has an undefined state in \p TLI, this is assumed

4643/// to be legal, as the non-promoted value would have had the same state.

4644staticboolisPromotedInstructionLegal(constTargetLowering &TLI,

4645constDataLayout &DL,Value *Val) {

4646Instruction *PromotedInst = dyn_cast<Instruction>(Val);

4647if (!PromotedInst)

4648returnfalse;

4649int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());

4650// If the ISDOpcode is undefined, it was undefined before the promotion.

4651if (!ISDOpcode)

4652returntrue;

4653// Otherwise, check if the promoted instruction is legal or not.

4654return TLI.isOperationLegalOrCustom(

4655 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));

4656}

4657

4658namespace{

4659

4660/// Hepler class to perform type promotion.

4661classTypePromotionHelper {

4662 /// Utility function to add a promoted instruction \p ExtOpnd to

4663 /// \p PromotedInsts and record the type of extension we have seen.

4664staticvoid addPromotedInst(InstrToOrigTy &PromotedInsts,

4665Instruction *ExtOpnd,bool IsSExt) {

4666 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;

4667 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);

4668if (It != PromotedInsts.end()) {

4669// If the new extension is same as original, the information in

4670// PromotedInsts[ExtOpnd] is still correct.

4671if (It->second.getInt() == ExtTy)

4672return;

4673

4674// Now the new extension is different from old extension, we make

4675// the type information invalid by setting extension type to

4676// BothExtension.

4677 ExtTy = BothExtension;

4678 }

4679 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);

4680 }

4681

4682 /// Utility function to query the original type of instruction \p Opnd

4683 /// with a matched extension type. If the extension doesn't match, we

4684 /// cannot use the information we had on the original type.

4685 /// BothExtension doesn't match any extension type.

4686staticconstType *getOrigType(const InstrToOrigTy &PromotedInsts,

4687Instruction *Opnd,bool IsSExt) {

4688 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;

4689 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);

4690if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)

4691return It->second.getPointer();

4692returnnullptr;

4693 }

4694

4695 /// Utility function to check whether or not a sign or zero extension

4696 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by

4697 /// either using the operands of \p Inst or promoting \p Inst.

4698 /// The type of the extension is defined by \p IsSExt.

4699 /// In other words, check if:

4700 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.

4701 /// #1 Promotion applies:

4702 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).

4703 /// #2 Operand reuses:

4704 /// ext opnd1 to ConsideredExtType.

4705 /// \p PromotedInsts maps the instructions to their type before promotion.

4706staticbool canGetThrough(constInstruction *Inst,Type *ConsideredExtType,

4707const InstrToOrigTy &PromotedInsts,bool IsSExt);

4708

4709 /// Utility function to determine if \p OpIdx should be promoted when

4710 /// promoting \p Inst.

4711staticbool shouldExtOperand(constInstruction *Inst,int OpIdx) {

4712return !(isa<SelectInst>(Inst) && OpIdx == 0);

4713 }

4714

4715 /// Utility function to promote the operand of \p Ext when this

4716 /// operand is a promotable trunc or sext or zext.

4717 /// \p PromotedInsts maps the instructions to their type before promotion.

4718 /// \p CreatedInstsCost[out] contains the cost of all instructions

4719 /// created to promote the operand of Ext.

4720 /// Newly added extensions are inserted in \p Exts.

4721 /// Newly added truncates are inserted in \p Truncs.

4722 /// Should never be called directly.

4723 /// \return The promoted value which is used instead of Ext.

4724staticValue *promoteOperandForTruncAndAnyExt(

4725Instruction *Ext, TypePromotionTransaction &TPT,

4726 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,

4727SmallVectorImpl<Instruction *> *Exts,

4728SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI);

4729

4730 /// Utility function to promote the operand of \p Ext when this

4731 /// operand is promotable and is not a supported trunc or sext.

4732 /// \p PromotedInsts maps the instructions to their type before promotion.

4733 /// \p CreatedInstsCost[out] contains the cost of all the instructions

4734 /// created to promote the operand of Ext.

4735 /// Newly added extensions are inserted in \p Exts.

4736 /// Newly added truncates are inserted in \p Truncs.

4737 /// Should never be called directly.

4738 /// \return The promoted value which is used instead of Ext.

4739staticValue *promoteOperandForOther(Instruction *Ext,

4740 TypePromotionTransaction &TPT,

4741 InstrToOrigTy &PromotedInsts,

4742unsigned &CreatedInstsCost,

4743SmallVectorImpl<Instruction *> *Exts,

4744SmallVectorImpl<Instruction *> *Truncs,

4745constTargetLowering &TLI,bool IsSExt);

4746

4747 /// \see promoteOperandForOther.

4748staticValue *signExtendOperandForOther(

4749Instruction *Ext, TypePromotionTransaction &TPT,

4750 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,

4751SmallVectorImpl<Instruction *> *Exts,

4752SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI) {

4753return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,

4754 Exts, Truncs, TLI,true);

4755 }

4756

4757 /// \see promoteOperandForOther.

4758staticValue *zeroExtendOperandForOther(

4759Instruction *Ext, TypePromotionTransaction &TPT,

4760 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,

4761SmallVectorImpl<Instruction *> *Exts,

4762SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI) {

4763return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,

4764 Exts, Truncs, TLI,false);

4765 }

4766

4767public:

4768 /// Type for the utility function that promotes the operand of Ext.

4769usingAction =Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,

4770 InstrToOrigTy &PromotedInsts,

4771unsigned &CreatedInstsCost,

4772SmallVectorImpl<Instruction *> *Exts,

4773SmallVectorImpl<Instruction *> *Truncs,

4774constTargetLowering &TLI);

4775

4776 /// Given a sign/zero extend instruction \p Ext, return the appropriate

4777 /// action to promote the operand of \p Ext instead of using Ext.

4778 /// \return NULL if no promotable action is possible with the current

4779 /// sign extension.

4780 /// \p InsertedInsts keeps track of all the instructions inserted by the

4781 /// other CodeGenPrepare optimizations. This information is important

4782 /// because we do not want to promote these instructions as CodeGenPrepare

4783 /// will reinsert them later. Thus creating an infinite loop: create/remove.

4784 /// \p PromotedInsts maps the instructions to their type before promotion.

4785static Action getAction(Instruction *Ext,const SetOfInstrs &InsertedInsts,

4786constTargetLowering &TLI,

4787const InstrToOrigTy &PromotedInsts);

4788};

4789

4790}// end anonymous namespace

4791

4792bool TypePromotionHelper::canGetThrough(constInstruction *Inst,

4793Type *ConsideredExtType,

4794const InstrToOrigTy &PromotedInsts,

4795bool IsSExt) {

4796// The promotion helper does not know how to deal with vector types yet.

4797// To be able to fix that, we would need to fix the places where we

4798// statically extend, e.g., constants and such.

4799if (Inst->getType()->isVectorTy())

4800returnfalse;

4801

4802// We can always get through zext.

4803if (isa<ZExtInst>(Inst))

4804returntrue;

4805

4806// sext(sext) is ok too.

4807if (IsSExt && isa<SExtInst>(Inst))

4808returntrue;

4809

4810// We can get through binary operator, if it is legal. In other words, the

4811// binary operator must have a nuw or nsw flag.

4812if (constauto *BinOp = dyn_cast<BinaryOperator>(Inst))

4813if (isa<OverflowingBinaryOperator>(BinOp) &&

4814 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||

4815 (IsSExt && BinOp->hasNoSignedWrap())))

4816returntrue;

4817

4818// ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))

4819if ((Inst->getOpcode() == Instruction::And ||

4820 Inst->getOpcode() == Instruction::Or))

4821returntrue;

4822

4823// ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))

4824if (Inst->getOpcode() == Instruction::Xor) {

4825// Make sure it is not a NOT.

4826if (constauto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))

4827if (!Cst->getValue().isAllOnes())

4828returntrue;

4829 }

4830

4831// zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))

4832// It may change a poisoned value into a regular value, like

4833// zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12

4834// poisoned value regular value

4835// It should be OK since undef covers valid value.

4836if (Inst->getOpcode() == Instruction::LShr && !IsSExt)

4837returntrue;

4838

4839// and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)

4840// It may change a poisoned value into a regular value, like

4841// zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12

4842// poisoned value regular value

4843// It should be OK since undef covers valid value.

4844if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {

4845constauto *ExtInst = cast<const Instruction>(*Inst->user_begin());

4846if (ExtInst->hasOneUse()) {

4847constauto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());

4848if (AndInst && AndInst->getOpcode() == Instruction::And) {

4849constauto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));

4850if (Cst &&

4851 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))

4852returntrue;

4853 }

4854 }

4855 }

4856

4857// Check if we can do the following simplification.

4858// ext(trunc(opnd)) --> ext(opnd)

4859if (!isa<TruncInst>(Inst))

4860returnfalse;

4861

4862Value *OpndVal = Inst->getOperand(0);

4863// Check if we can use this operand in the extension.

4864// If the type is larger than the result type of the extension, we cannot.

4865if (!OpndVal->getType()->isIntegerTy() ||

4866 OpndVal->getType()->getIntegerBitWidth() >

4867 ConsideredExtType->getIntegerBitWidth())

4868returnfalse;

4869

4870// If the operand of the truncate is not an instruction, we will not have

4871// any information on the dropped bits.

4872// (Actually we could for constant but it is not worth the extra logic).

4873Instruction *Opnd = dyn_cast<Instruction>(OpndVal);

4874if (!Opnd)

4875returnfalse;

4876

4877// Check if the source of the type is narrow enough.

4878// I.e., check that trunc just drops extended bits of the same kind of

4879// the extension.

4880// #1 get the type of the operand and check the kind of the extended bits.

4881constType *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);

4882if (OpndType)

4883 ;

4884elseif ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))

4885 OpndType = Opnd->getOperand(0)->getType();

4886else

4887returnfalse;

4888

4889// #2 check that the truncate just drops extended bits.

4890return Inst->getType()->getIntegerBitWidth() >=

4891 OpndType->getIntegerBitWidth();

4892}

4893

4894TypePromotionHelper::Action TypePromotionHelper::getAction(

4895Instruction *Ext,const SetOfInstrs &InsertedInsts,

4896constTargetLowering &TLI,const InstrToOrigTy &PromotedInsts) {

4897assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&

4898"Unexpected instruction type");

4899Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));

4900Type *ExtTy =Ext->getType();

4901bool IsSExt = isa<SExtInst>(Ext);

4902// If the operand of the extension is not an instruction, we cannot

4903// get through.

4904// If it, check we can get through.

4905if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))

4906returnnullptr;

4907

4908// Do not promote if the operand has been added by codegenprepare.

4909// Otherwise, it means we are undoing an optimization that is likely to be

4910// redone, thus causing potential infinite loop.

4911if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))

4912returnnullptr;

4913

4914// SExt or Trunc instructions.

4915// Return the related handler.

4916if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||

4917 isa<ZExtInst>(ExtOpnd))

4918return promoteOperandForTruncAndAnyExt;

4919

4920// Regular instruction.

4921// Abort early if we will have to insert non-free instructions.

4922if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))

4923returnnullptr;

4924return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;

4925}

4926

4927Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(

4928Instruction *SExt, TypePromotionTransaction &TPT,

4929 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,

4930SmallVectorImpl<Instruction *> *Exts,

4931SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI) {

4932// By construction, the operand of SExt is an instruction. Otherwise we cannot

4933// get through it and this method should not be called.

4934Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));

4935Value *ExtVal = SExt;

4936bool HasMergedNonFreeExt =false;

4937if (isa<ZExtInst>(SExtOpnd)) {

4938// Replace s|zext(zext(opnd))

4939// => zext(opnd).

4940 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);

4941Value *ZExt =

4942 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());

4943 TPT.replaceAllUsesWith(SExt, ZExt);

4944 TPT.eraseInstruction(SExt);

4945 ExtVal = ZExt;

4946 }else {

4947// Replace z|sext(trunc(opnd)) or sext(sext(opnd))

4948// => z|sext(opnd).

4949 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));

4950 }

4951 CreatedInstsCost = 0;

4952

4953// Remove dead code.

4954if (SExtOpnd->use_empty())

4955 TPT.eraseInstruction(SExtOpnd);

4956

4957// Check if the extension is still needed.

4958Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);

4959if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {

4960if (ExtInst) {

4961if (Exts)

4962 Exts->push_back(ExtInst);

4963 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;

4964 }

4965return ExtVal;

4966 }

4967

4968// At this point we have: ext ty opnd to ty.

4969// Reassign the uses of ExtInst to the opnd and remove ExtInst.

4970Value *NextVal = ExtInst->getOperand(0);

4971 TPT.eraseInstruction(ExtInst, NextVal);

4972return NextVal;

4973}

4974

4975Value *TypePromotionHelper::promoteOperandForOther(

4976Instruction *Ext, TypePromotionTransaction &TPT,

4977 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,

4978SmallVectorImpl<Instruction *> *Exts,

4979SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI,

4980bool IsSExt) {

4981// By construction, the operand of Ext is an instruction. Otherwise we cannot

4982// get through it and this method should not be called.

4983Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));

4984 CreatedInstsCost = 0;

4985if (!ExtOpnd->hasOneUse()) {

4986// ExtOpnd will be promoted.

4987// All its uses, but Ext, will need to use a truncated value of the

4988// promoted version.

4989// Create the truncate now.

4990Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());

4991if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {

4992// Insert it just after the definition.

4993 ITrunc->moveAfter(ExtOpnd);

4994if (Truncs)

4995 Truncs->push_back(ITrunc);

4996 }

4997

4998 TPT.replaceAllUsesWith(ExtOpnd, Trunc);

4999// Restore the operand of Ext (which has been replaced by the previous call

5000// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.

5001 TPT.setOperand(Ext, 0, ExtOpnd);

5002 }

5003

5004// Get through the Instruction:

5005// 1. Update its type.

5006// 2. Replace the uses of Ext by Inst.

5007// 3. Extend each operand that needs to be extended.

5008

5009// Remember the original type of the instruction before promotion.

5010// This is useful to know that the high bits are sign extended bits.

5011 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);

5012// Step #1.

5013 TPT.mutateType(ExtOpnd,Ext->getType());

5014// Step #2.

5015 TPT.replaceAllUsesWith(Ext, ExtOpnd);

5016// Step #3.

5017LLVM_DEBUG(dbgs() <<"Propagate Ext to operands\n");

5018for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;

5019 ++OpIdx) {

5020LLVM_DEBUG(dbgs() <<"Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) <<'\n');

5021if (ExtOpnd->getOperand(OpIdx)->getType() ==Ext->getType() ||

5022 !shouldExtOperand(ExtOpnd, OpIdx)) {

5023LLVM_DEBUG(dbgs() <<"No need to propagate\n");

5024continue;

5025 }

5026// Check if we can statically extend the operand.

5027Value *Opnd = ExtOpnd->getOperand(OpIdx);

5028if (constConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {

5029LLVM_DEBUG(dbgs() <<"Statically extend\n");

5030unsignedBitWidth =Ext->getType()->getIntegerBitWidth();

5031APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)

5032 : Cst->getValue().zext(BitWidth);

5033 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));

5034continue;

5035 }

5036// UndefValue are typed, so we have to statically sign extend them.

5037if (isa<UndefValue>(Opnd)) {

5038LLVM_DEBUG(dbgs() <<"Statically extend\n");

5039 TPT.setOperand(ExtOpnd, OpIdx,UndefValue::get(Ext->getType()));

5040continue;

5041 }

5042

5043// Otherwise we have to explicitly sign extend the operand.

5044Value *ValForExtOpnd = IsSExt

5045 ? TPT.createSExt(ExtOpnd, Opnd,Ext->getType())

5046 : TPT.createZExt(ExtOpnd, Opnd,Ext->getType());

5047 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);

5048Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);

5049if (!InstForExtOpnd)

5050continue;

5051

5052if (Exts)

5053 Exts->push_back(InstForExtOpnd);

5054

5055 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);

5056 }

5057LLVM_DEBUG(dbgs() <<"Extension is useless now\n");

5058 TPT.eraseInstruction(Ext);

5059return ExtOpnd;

5060}

5061

5062/// Check whether or not promoting an instruction to a wider type is profitable.

5063/// \p NewCost gives the cost of extension instructions created by the

5064/// promotion.

5065/// \p OldCost gives the cost of extension instructions before the promotion

5066/// plus the number of instructions that have been

5067/// matched in the addressing mode the promotion.

5068/// \p PromotedOperand is the value that has been promoted.

5069/// \return True if the promotion is profitable, false otherwise.

5070bool AddressingModeMatcher::isPromotionProfitable(

5071unsigned NewCost,unsigned OldCost,Value *PromotedOperand) const{

5072LLVM_DEBUG(dbgs() <<"OldCost: " << OldCost <<"\tNewCost: " << NewCost

5073 <<'\n');

5074// The cost of the new extensions is greater than the cost of the

5075// old extension plus what we folded.

5076// This is not profitable.

5077if (NewCost > OldCost)

5078returnfalse;

5079if (NewCost < OldCost)

5080returntrue;

5081// The promotion is neutral but it may help folding the sign extension in

5082// loads for instance.

5083// Check that we did not create an illegal instruction.

5084returnisPromotedInstructionLegal(TLI,DL, PromotedOperand);

5085}

5086

5087/// Given an instruction or constant expr, see if we can fold the operation

5088/// into the addressing mode. If so, update the addressing mode and return

5089/// true, otherwise return false without modifying AddrMode.

5090/// If \p MovedAway is not NULL, it contains the information of whether or

5091/// not AddrInst has to be folded into the addressing mode on success.

5092/// If \p MovedAway == true, \p AddrInst will not be part of the addressing

5093/// because it has been moved away.

5094/// Thus AddrInst must not be added in the matched instructions.

5095/// This state can happen when AddrInst is a sext, since it may be moved away.

5096/// Therefore, AddrInst may not be valid when MovedAway is true and it must

5097/// not be referenced anymore.

5098bool AddressingModeMatcher::matchOperationAddr(User *AddrInst,unsigned Opcode,

5099unsignedDepth,

5100bool *MovedAway) {

5101// Avoid exponential behavior on extremely deep expression trees.

5102if (Depth >= 5)

5103returnfalse;

5104

5105// By default, all matched instructions stay in place.

5106if (MovedAway)

5107 *MovedAway =false;

5108

5109switch (Opcode) {

5110case Instruction::PtrToInt:

5111// PtrToInt is always a noop, as we know that the int type is pointer sized.

5112return matchAddr(AddrInst->getOperand(0),Depth);

5113case Instruction::IntToPtr: {

5114auto AS = AddrInst->getType()->getPointerAddressSpace();

5115auto PtrTy =MVT::getIntegerVT(DL.getPointerSizeInBits(AS));

5116// This inttoptr is a no-op if the integer type is pointer sized.

5117if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)

5118return matchAddr(AddrInst->getOperand(0),Depth);

5119returnfalse;

5120 }

5121case Instruction::BitCast:

5122// BitCast is always a noop, and we can handle it as long as it is

5123// int->int or pointer->pointer (we don't want int<->fp or something).

5124if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&

5125// Don't touch identity bitcasts. These were probably put here by LSR,

5126// and we don't want to mess around with them. Assume it knows what it

5127// is doing.

5128 AddrInst->getOperand(0)->getType() != AddrInst->getType())

5129return matchAddr(AddrInst->getOperand(0),Depth);

5130returnfalse;

5131case Instruction::AddrSpaceCast: {

5132unsigned SrcAS =

5133 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();

5134unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();

5135if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))

5136return matchAddr(AddrInst->getOperand(0),Depth);

5137returnfalse;

5138 }

5139case Instruction::Add: {

5140// Check to see if we can merge in one operand, then the other. If so, we

5141// win.

5142ExtAddrMode BackupAddrMode =AddrMode;

5143unsigned OldSize = AddrModeInsts.size();

5144// Start a transaction at this point.

5145// The LHS may match but not the RHS.

5146// Therefore, we need a higher level restoration point to undo partially

5147// matched operation.

5148 TypePromotionTransaction::ConstRestorationPt LastKnownGood =

5149 TPT.getRestorationPoint();

5150

5151// Try to match an integer constant second to increase its chance of ending

5152// up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.

5153intFirst = 0, Second = 1;

5154if (isa<ConstantInt>(AddrInst->getOperand(First))

5155 && !isa<ConstantInt>(AddrInst->getOperand(Second)))

5156std::swap(First, Second);

5157AddrMode.InBounds =false;

5158if (matchAddr(AddrInst->getOperand(First),Depth + 1) &&

5159 matchAddr(AddrInst->getOperand(Second),Depth + 1))

5160returntrue;

5161

5162// Restore the old addr mode info.

5163AddrMode = BackupAddrMode;

5164 AddrModeInsts.resize(OldSize);

5165 TPT.rollback(LastKnownGood);

5166

5167// Otherwise this was over-aggressive. Try merging operands in the opposite

5168// order.

5169if (matchAddr(AddrInst->getOperand(Second),Depth + 1) &&

5170 matchAddr(AddrInst->getOperand(First),Depth + 1))

5171returntrue;

5172

5173// Otherwise we definitely can't merge the ADD in.

5174AddrMode = BackupAddrMode;

5175 AddrModeInsts.resize(OldSize);

5176 TPT.rollback(LastKnownGood);

5177break;

5178 }

5179// case Instruction::Or:

5180// TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.

5181// break;

5182case Instruction::Mul:

5183case Instruction::Shl: {

5184// Can only handle X*C and X << C.

5185AddrMode.InBounds =false;

5186ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));

5187if (!RHS ||RHS->getBitWidth() > 64)

5188returnfalse;

5189 int64_t Scale = Opcode == Instruction::Shl

5190 ? 1LL <<RHS->getLimitedValue(RHS->getBitWidth() - 1)

5191 :RHS->getSExtValue();

5192

5193return matchScaledValue(AddrInst->getOperand(0), Scale,Depth);

5194 }

5195case Instruction::GetElementPtr: {

5196// Scan the GEP. We check it if it contains constant offsets and at most

5197// one variable offset.

5198int VariableOperand = -1;

5199unsigned VariableScale = 0;

5200

5201 int64_t ConstantOffset = 0;

5202gep_type_iterator GTI =gep_type_begin(AddrInst);

5203for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {

5204if (StructType *STy = GTI.getStructTypeOrNull()) {

5205constStructLayout *SL =DL.getStructLayout(STy);

5206unsignedIdx =

5207 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();

5208 ConstantOffset += SL->getElementOffset(Idx);

5209 }else {

5210TypeSize TS = GTI.getSequentialElementStride(DL);

5211if (TS.isNonZero()) {

5212// The optimisations below currently only work for fixed offsets.

5213if (TS.isScalable())

5214returnfalse;

5215 int64_tTypeSize = TS.getFixedValue();

5216if (ConstantInt *CI =

5217 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {

5218constAPInt &CVal = CI->getValue();

5219if (CVal.getSignificantBits() <= 64) {

5220 ConstantOffset += CVal.getSExtValue() *TypeSize;

5221continue;

5222 }

5223 }

5224// We only allow one variable index at the moment.

5225if (VariableOperand != -1)

5226returnfalse;

5227

5228// Remember the variable index.

5229 VariableOperand = i;

5230 VariableScale =TypeSize;

5231 }

5232 }

5233 }

5234

5235// A common case is for the GEP to only do a constant offset. In this case,

5236// just add it to the disp field and check validity.

5237if (VariableOperand == -1) {

5238AddrMode.BaseOffs += ConstantOffset;

5239if (matchAddr(AddrInst->getOperand(0),Depth + 1)) {

5240if (!cast<GEPOperator>(AddrInst)->isInBounds())

5241AddrMode.InBounds =false;

5242returntrue;

5243 }

5244AddrMode.BaseOffs -= ConstantOffset;

5245

5246if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&

5247 TLI.shouldConsiderGEPOffsetSplit() &&Depth == 0 &&

5248 ConstantOffset > 0) {

5249// Record GEPs with non-zero offsets as candidates for splitting in

5250// the event that the offset cannot fit into the r+i addressing mode.

5251// Simple and common case that only one GEP is used in calculating the

5252// address for the memory access.

5253Value *Base = AddrInst->getOperand(0);

5254auto *BaseI = dyn_cast<Instruction>(Base);

5255auto *GEP = cast<GetElementPtrInst>(AddrInst);

5256if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||

5257 (BaseI && !isa<CastInst>(BaseI) &&

5258 !isa<GetElementPtrInst>(BaseI))) {

5259// Make sure the parent block allows inserting non-PHI instructions

5260// before the terminator.

5261BasicBlock *Parent = BaseI ? BaseI->getParent()

5262 : &GEP->getFunction()->getEntryBlock();

5263if (!Parent->getTerminator()->isEHPad())

5264 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);

5265 }

5266 }

5267

5268returnfalse;

5269 }

5270

5271// Save the valid addressing mode in case we can't match.

5272ExtAddrMode BackupAddrMode =AddrMode;

5273unsigned OldSize = AddrModeInsts.size();

5274

5275// See if the scale and offset amount is valid for this target.

5276AddrMode.BaseOffs += ConstantOffset;

5277if (!cast<GEPOperator>(AddrInst)->isInBounds())

5278AddrMode.InBounds =false;

5279

5280// Match the base operand of the GEP.

5281if (!matchAddr(AddrInst->getOperand(0),Depth + 1)) {

5282// If it couldn't be matched, just stuff the value in a register.

5283if (AddrMode.HasBaseReg) {

5284AddrMode = BackupAddrMode;

5285 AddrModeInsts.resize(OldSize);

5286returnfalse;

5287 }

5288AddrMode.HasBaseReg =true;

5289AddrMode.BaseReg = AddrInst->getOperand(0);

5290 }

5291

5292// Match the remaining variable portion of the GEP.

5293if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,

5294Depth)) {

5295// If it couldn't be matched, try stuffing the base into a register

5296// instead of matching it, and retrying the match of the scale.

5297AddrMode = BackupAddrMode;

5298 AddrModeInsts.resize(OldSize);

5299if (AddrMode.HasBaseReg)

5300returnfalse;

5301AddrMode.HasBaseReg =true;

5302AddrMode.BaseReg = AddrInst->getOperand(0);

5303AddrMode.BaseOffs += ConstantOffset;

5304if (!matchScaledValue(AddrInst->getOperand(VariableOperand),

5305 VariableScale,Depth)) {

5306// If even that didn't work, bail.

5307AddrMode = BackupAddrMode;

5308 AddrModeInsts.resize(OldSize);

5309returnfalse;

5310 }

5311 }

5312

5313returntrue;

5314 }

5315case Instruction::SExt:

5316case Instruction::ZExt: {

5317Instruction *Ext = dyn_cast<Instruction>(AddrInst);

5318if (!Ext)

5319returnfalse;

5320

5321// Try to move this ext out of the way of the addressing mode.

5322// Ask for a method for doing so.

5323 TypePromotionHelper::Action TPH =

5324 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);

5325if (!TPH)

5326returnfalse;

5327

5328 TypePromotionTransaction::ConstRestorationPt LastKnownGood =

5329 TPT.getRestorationPoint();

5330unsigned CreatedInstsCost = 0;

5331unsigned ExtCost = !TLI.isExtFree(Ext);

5332Value *PromotedOperand =

5333 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost,nullptr,nullptr, TLI);

5334// SExt has been moved away.

5335// Thus either it will be rematched later in the recursive calls or it is

5336// gone. Anyway, we must not fold it into the addressing mode at this point.

5337// E.g.,

5338// op = add opnd, 1

5339// idx = ext op

5340// addr = gep base, idx

5341// is now:

5342// promotedOpnd = ext opnd <- no match here

5343// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)

5344// addr = gep base, op <- match

5345if (MovedAway)

5346 *MovedAway =true;

5347

5348assert(PromotedOperand &&

5349"TypePromotionHelper should have filtered out those cases");

5350

5351ExtAddrMode BackupAddrMode =AddrMode;

5352unsigned OldSize = AddrModeInsts.size();

5353

5354if (!matchAddr(PromotedOperand,Depth) ||

5355// The total of the new cost is equal to the cost of the created

5356// instructions.

5357// The total of the old cost is equal to the cost of the extension plus

5358// what we have saved in the addressing mode.

5359 !isPromotionProfitable(CreatedInstsCost,

5360 ExtCost + (AddrModeInsts.size() - OldSize),

5361 PromotedOperand)) {

5362AddrMode = BackupAddrMode;

5363 AddrModeInsts.resize(OldSize);

5364LLVM_DEBUG(dbgs() <<"Sign extension does not pay off: rollback\n");

5365 TPT.rollback(LastKnownGood);

5366returnfalse;

5367 }

5368returntrue;

5369 }

5370case Instruction::Call:

5371if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {

5372if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {

5373GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));

5374if (TLI.addressingModeSupportsTLS(GV))

5375return matchAddr(AddrInst->getOperand(0),Depth);

5376 }

5377 }

5378break;

5379 }

5380returnfalse;

5381}

5382

5383/// If we can, try to add the value of 'Addr' into the current addressing mode.

5384/// If Addr can't be added to AddrMode this returns false and leaves AddrMode

5385/// unmodified. This assumes that Addr is either a pointer type or intptr_t

5386/// for the target.

5387///

5388bool AddressingModeMatcher::matchAddr(Value *Addr,unsignedDepth) {

5389// Start a transaction at this point that we will rollback if the matching

5390// fails.

5391 TypePromotionTransaction::ConstRestorationPt LastKnownGood =

5392 TPT.getRestorationPoint();

5393if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {

5394if (CI->getValue().isSignedIntN(64)) {

5395// Fold in immediates if legal for the target.

5396AddrMode.BaseOffs += CI->getSExtValue();

5397if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))

5398returntrue;

5399AddrMode.BaseOffs -= CI->getSExtValue();

5400 }

5401 }elseif (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {

5402// If this is a global variable, try to fold it into the addressing mode.

5403if (!AddrMode.BaseGV) {

5404AddrMode.BaseGV = GV;

5405if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))

5406returntrue;

5407AddrMode.BaseGV =nullptr;

5408 }

5409 }elseif (Instruction *I = dyn_cast<Instruction>(Addr)) {

5410ExtAddrMode BackupAddrMode =AddrMode;

5411unsigned OldSize = AddrModeInsts.size();

5412

5413// Check to see if it is possible to fold this operation.

5414bool MovedAway =false;

5415if (matchOperationAddr(I,I->getOpcode(),Depth, &MovedAway)) {

5416// This instruction may have been moved away. If so, there is nothing

5417// to check here.

5418if (MovedAway)

5419returntrue;

5420// Okay, it's possible to fold this. Check to see if it is actually

5421// *profitable* to do so. We use a simple cost model to avoid increasing

5422// register pressure too much.

5423if (I->hasOneUse() ||

5424 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode,AddrMode)) {

5425 AddrModeInsts.push_back(I);

5426returntrue;

5427 }

5428

5429// It isn't profitable to do this, roll back.

5430AddrMode = BackupAddrMode;

5431 AddrModeInsts.resize(OldSize);

5432 TPT.rollback(LastKnownGood);

5433 }

5434 }elseif (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {

5435if (matchOperationAddr(CE,CE->getOpcode(),Depth))

5436returntrue;

5437 TPT.rollback(LastKnownGood);

5438 }elseif (isa<ConstantPointerNull>(Addr)) {

5439// Null pointer gets folded without affecting the addressing mode.

5440returntrue;

5441 }

5442

5443// Worse case, the target should support [reg] addressing modes. :)

5444if (!AddrMode.HasBaseReg) {

5445AddrMode.HasBaseReg =true;

5446AddrMode.BaseReg =Addr;

5447// Still check for legality in case the target supports [imm] but not [i+r].

5448if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))

5449returntrue;

5450AddrMode.HasBaseReg =false;

5451AddrMode.BaseReg =nullptr;

5452 }

5453

5454// If the base register is already taken, see if we can do [r+r].

5455if (AddrMode.Scale == 0) {

5456AddrMode.Scale = 1;

5457AddrMode.ScaledReg =Addr;

5458if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))

5459returntrue;

5460AddrMode.Scale = 0;

5461AddrMode.ScaledReg =nullptr;

5462 }

5463// Couldn't match.

5464 TPT.rollback(LastKnownGood);

5465returnfalse;

5466}

5467

5468/// Check to see if all uses of OpVal by the specified inline asm call are due

5469/// to memory operands. If so, return true, otherwise return false.

5470staticboolIsOperandAMemoryOperand(CallInst *CI,InlineAsm *IA,Value *OpVal,

5471constTargetLowering &TLI,

5472constTargetRegisterInfo &TRI) {

5473constFunction *F = CI->getFunction();

5474TargetLowering::AsmOperandInfoVector TargetConstraints =

5475 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);

5476

5477for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {

5478// Compute the constraint code and ConstraintType to use.

5479 TLI.ComputeConstraintToUse(OpInfo,SDValue());

5480

5481// If this asm operand is our Value*, and if it isn't an indirect memory

5482// operand, we can't fold it! TODO: Also handle C_Address?

5483if (OpInfo.CallOperandVal == OpVal &&

5484 (OpInfo.ConstraintType !=TargetLowering::C_Memory ||

5485 !OpInfo.isIndirect))

5486returnfalse;

5487 }

5488

5489returntrue;

5490}

5491

5492/// Recursively walk all the uses of I until we find a memory use.

5493/// If we find an obviously non-foldable instruction, return true.

5494/// Add accessed addresses and types to MemoryUses.

5495staticboolFindAllMemoryUses(

5496Instruction *I,SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,

5497SmallPtrSetImpl<Instruction *> &ConsideredInsts,constTargetLowering &TLI,

5498constTargetRegisterInfo &TRI,bool OptSize,ProfileSummaryInfo *PSI,

5499BlockFrequencyInfo *BFI,unsigned &SeenInsts) {

5500// If we already considered this instruction, we're done.

5501if (!ConsideredInsts.insert(I).second)

5502returnfalse;

5503

5504// If this is an obviously unfoldable instruction, bail out.

5505if (!MightBeFoldableInst(I))

5506returntrue;

5507

5508// Loop over all the uses, recursively processing them.

5509for (Use &U :I->uses()) {

5510// Conservatively return true if we're seeing a large number or a deep chain

5511// of users. This avoids excessive compilation times in pathological cases.

5512if (SeenInsts++ >=MaxAddressUsersToScan)

5513returntrue;

5514

5515Instruction *UserI = cast<Instruction>(U.getUser());

5516if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {

5517 MemoryUses.push_back({&U, LI->getType()});

5518continue;

5519 }

5520

5521if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {

5522if (U.getOperandNo() !=StoreInst::getPointerOperandIndex())

5523returntrue;// Storing addr, not into addr.

5524 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});

5525continue;

5526 }

5527

5528if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {

5529if (U.getOperandNo() !=AtomicRMWInst::getPointerOperandIndex())

5530returntrue;// Storing addr, not into addr.

5531 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});

5532continue;

5533 }

5534

5535if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {

5536if (U.getOperandNo() !=AtomicCmpXchgInst::getPointerOperandIndex())

5537returntrue;// Storing addr, not into addr.

5538 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});

5539continue;

5540 }

5541

5542if (CallInst *CI = dyn_cast<CallInst>(UserI)) {

5543if (CI->hasFnAttr(Attribute::Cold)) {

5544// If this is a cold call, we can sink the addressing calculation into

5545// the cold path. See optimizeCallInst

5546if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))

5547continue;

5548 }

5549

5550InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());

5551if (!IA)

5552returntrue;

5553

5554// If this is a memory operand, we're cool, otherwise bail out.

5555if (!IsOperandAMemoryOperand(CI, IA,I, TLI,TRI))

5556returntrue;

5557continue;

5558 }

5559

5560if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI,TRI, OptSize,

5561 PSI, BFI, SeenInsts))

5562returntrue;

5563 }

5564

5565returnfalse;

5566}

5567

5568staticboolFindAllMemoryUses(

5569Instruction *I,SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,

5570constTargetLowering &TLI,constTargetRegisterInfo &TRI,bool OptSize,

5571ProfileSummaryInfo *PSI,BlockFrequencyInfo *BFI) {

5572unsigned SeenInsts = 0;

5573SmallPtrSet<Instruction *, 16> ConsideredInsts;

5574returnFindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI,TRI, OptSize,

5575 PSI, BFI, SeenInsts);

5576}

5577

5578

5579/// Return true if Val is already known to be live at the use site that we're

5580/// folding it into. If so, there is no cost to include it in the addressing

5581/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the

5582/// instruction already.

5583bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,

5584Value *KnownLive1,

5585Value *KnownLive2) {

5586// If Val is either of the known-live values, we know it is live!

5587if (Val ==nullptr || Val == KnownLive1 || Val == KnownLive2)

5588returntrue;

5589

5590// All values other than instructions and arguments (e.g. constants) are live.

5591if (!isa<Instruction>(Val) && !isa<Argument>(Val))

5592returntrue;

5593

5594// If Val is a constant sized alloca in the entry block, it is live, this is

5595// true because it is just a reference to the stack/frame pointer, which is

5596// live for the whole function.

5597if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))

5598if (AI->isStaticAlloca())

5599returntrue;

5600

5601// Check to see if this value is already used in the memory instruction's

5602// block. If so, it's already live into the block at the very least, so we

5603// can reasonably fold it.

5604return Val->isUsedInBasicBlock(MemoryInst->getParent());

5605}

5606

5607/// It is possible for the addressing mode of the machine to fold the specified

5608/// instruction into a load or store that ultimately uses it.

5609/// However, the specified instruction has multiple uses.

5610/// Given this, it may actually increase register pressure to fold it

5611/// into the load. For example, consider this code:

5612///

5613/// X = ...

5614/// Y = X+1

5615/// use(Y) -> nonload/store

5616/// Z = Y+1

5617/// load Z

5618///

5619/// In this case, Y has multiple uses, and can be folded into the load of Z

5620/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to

5621/// be live at the use(Y) line. If we don't fold Y into load Z, we use one

5622/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the

5623/// number of computations either.

5624///

5625/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If

5626/// X was live across 'load Z' for other reasons, we actually *would* want to

5627/// fold the addressing mode in the Z case. This would make Y die earlier.

5628bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(

5629Instruction *I,ExtAddrMode &AMBefore,ExtAddrMode &AMAfter) {

5630if (IgnoreProfitability)

5631returntrue;

5632

5633// AMBefore is the addressing mode before this instruction was folded into it,

5634// and AMAfter is the addressing mode after the instruction was folded. Get

5635// the set of registers referenced by AMAfter and subtract out those

5636// referenced by AMBefore: this is the set of values which folding in this

5637// address extends the lifetime of.

5638//

5639// Note that there are only two potential values being referenced here,

5640// BaseReg and ScaleReg (global addresses are always available, as are any

5641// folded immediates).

5642Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;

5643

5644// If the BaseReg or ScaledReg was referenced by the previous addrmode, their

5645// lifetime wasn't extended by adding this instruction.

5646if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))

5647 BaseReg =nullptr;

5648if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))

5649 ScaledReg =nullptr;

5650

5651// If folding this instruction (and it's subexprs) didn't extend any live

5652// ranges, we're ok with it.

5653if (!BaseReg && !ScaledReg)

5654returntrue;

5655

5656// If all uses of this instruction can have the address mode sunk into them,

5657// we can remove the addressing mode and effectively trade one live register

5658// for another (at worst.) In this context, folding an addressing mode into

5659// the use is just a particularly nice way of sinking it.

5660SmallVector<std::pair<Use *, Type *>, 16> MemoryUses;

5661if (FindAllMemoryUses(I, MemoryUses, TLI,TRI, OptSize, PSI, BFI))

5662returnfalse;// Has a non-memory, non-foldable use!

5663

5664// Now that we know that all uses of this instruction are part of a chain of

5665// computation involving only operations that could theoretically be folded

5666// into a memory use, loop over each of these memory operation uses and see

5667// if they could *actually* fold the instruction. The assumption is that

5668// addressing modes are cheap and that duplicating the computation involved

5669// many times is worthwhile, even on a fastpath. For sinking candidates

5670// (i.e. cold call sites), this serves as a way to prevent excessive code

5671// growth since most architectures have some reasonable small and fast way to

5672// compute an effective address. (i.e LEA on x86)

5673SmallVector<Instruction *, 32> MatchedAddrModeInsts;

5674for (const std::pair<Use *, Type *> &Pair : MemoryUses) {

5675Value *Address = Pair.first->get();

5676Instruction *UserI = cast<Instruction>(Pair.first->getUser());

5677Type *AddressAccessTy = Pair.second;

5678unsigned AS =Address->getType()->getPointerAddressSpace();

5679

5680// Do a match against the root of this address, ignoring profitability. This

5681// will tell us if the addressing mode for the memory operation will

5682// *actually* cover the shared instruction.

5683ExtAddrMode Result;

5684 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,

5685 0);

5686 TypePromotionTransaction::ConstRestorationPt LastKnownGood =

5687 TPT.getRestorationPoint();

5688 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI,TRI, LI, getDTFn,

5689 AddressAccessTy, AS, UserI, Result,

5690 InsertedInsts, PromotedInsts, TPT,

5691 LargeOffsetGEP, OptSize, PSI, BFI);

5692 Matcher.IgnoreProfitability =true;

5693boolSuccess = Matcher.matchAddr(Address, 0);

5694 (void)Success;

5695assert(Success &&"Couldn't select *anything*?");

5696

5697// The match was to check the profitability, the changes made are not

5698// part of the original matcher. Therefore, they should be dropped

5699// otherwise the original matcher will not present the right state.

5700 TPT.rollback(LastKnownGood);

5701

5702// If the match didn't cover I, then it won't be shared by it.

5703if (!is_contained(MatchedAddrModeInsts,I))

5704returnfalse;

5705

5706 MatchedAddrModeInsts.clear();

5707 }

5708

5709returntrue;

5710}

5711

5712/// Return true if the specified values are defined in a

5713/// different basic block than BB.

5714staticboolIsNonLocalValue(Value *V,BasicBlock *BB) {

5715if (Instruction *I = dyn_cast<Instruction>(V))

5716returnI->getParent() != BB;

5717returnfalse;

5718}

5719

5720/// Sink addressing mode computation immediate before MemoryInst if doing so

5721/// can be done without increasing register pressure. The need for the

5722/// register pressure constraint means this can end up being an all or nothing

5723/// decision for all uses of the same addressing computation.

5724///

5725/// Load and Store Instructions often have addressing modes that can do

5726/// significant amounts of computation. As such, instruction selection will try

5727/// to get the load or store to do as much computation as possible for the

5728/// program. The problem is that isel can only see within a single block. As

5729/// such, we sink as much legal addressing mode work into the block as possible.

5730///

5731/// This method is used to optimize both load/store and inline asms with memory

5732/// operands. It's also used to sink addressing computations feeding into cold

5733/// call sites into their (cold) basic block.

5734///

5735/// The motivation for handling sinking into cold blocks is that doing so can

5736/// both enable other address mode sinking (by satisfying the register pressure

5737/// constraint above), and reduce register pressure globally (by removing the

5738/// addressing mode computation from the fast path entirely.).

5739bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst,Value *Addr,

5740Type *AccessTy,unsigned AddrSpace) {

5741Value *Repl =Addr;

5742

5743// Try to collapse single-value PHI nodes. This is necessary to undo

5744// unprofitable PRE transformations.

5745SmallVector<Value *, 8> worklist;

5746SmallPtrSet<Value *, 16> Visited;

5747 worklist.push_back(Addr);

5748

5749// Use a worklist to iteratively look through PHI and select nodes, and

5750// ensure that the addressing mode obtained from the non-PHI/select roots of

5751// the graph are compatible.

5752bool PhiOrSelectSeen =false;

5753SmallVector<Instruction *, 16> AddrModeInsts;

5754constSimplifyQuery SQ(*DL, TLInfo);

5755 AddressingModeCombiner AddrModes(SQ,Addr);

5756 TypePromotionTransaction TPT(RemovedInsts);

5757 TypePromotionTransaction::ConstRestorationPt LastKnownGood =

5758 TPT.getRestorationPoint();

5759while (!worklist.empty()) {

5760Value *V = worklist.pop_back_val();

5761

5762// We allow traversing cyclic Phi nodes.

5763// In case of success after this loop we ensure that traversing through

5764// Phi nodes ends up with all cases to compute address of the form

5765// BaseGV + Base + Scale * Index + Offset

5766// where Scale and Offset are constans and BaseGV, Base and Index

5767// are exactly the same Values in all cases.

5768// It means that BaseGV, Scale and Offset dominate our memory instruction

5769// and have the same value as they had in address computation represented

5770// as Phi. So we can safely sink address computation to memory instruction.

5771if (!Visited.insert(V).second)

5772continue;

5773

5774// For a PHI node, push all of its incoming values.

5775if (PHINode *P = dyn_cast<PHINode>(V)) {

5776append_range(worklist,P->incoming_values());

5777 PhiOrSelectSeen =true;

5778continue;

5779 }

5780// Similar for select.

5781if (SelectInst *SI = dyn_cast<SelectInst>(V)) {

5782 worklist.push_back(SI->getFalseValue());

5783 worklist.push_back(SI->getTrueValue());

5784 PhiOrSelectSeen =true;

5785continue;

5786 }

5787

5788// For non-PHIs, determine the addressing mode being computed. Note that

5789// the result may differ depending on what other uses our candidate

5790// addressing instructions might have.

5791 AddrModeInsts.clear();

5792 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,

5793 0);

5794// Defer the query (and possible computation of) the dom tree to point of

5795// actual use. It's expected that most address matches don't actually need

5796// the domtree.

5797auto getDTFn = [MemoryInst,this]() ->constDominatorTree & {

5798Function *F = MemoryInst->getParent()->getParent();

5799return this->getDT(*F);

5800 };

5801ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(

5802 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,

5803 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,

5804BFI.get());

5805

5806GetElementPtrInst *GEP = LargeOffsetGEP.first;

5807if (GEP && !NewGEPBases.count(GEP)) {

5808// If splitting the underlying data structure can reduce the offset of a

5809// GEP, collect the GEP. Skip the GEPs that are the new bases of

5810// previously split data structures.

5811 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);

5812 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));

5813 }

5814

5815 NewAddrMode.OriginalValue =V;

5816if (!AddrModes.addNewAddrMode(NewAddrMode))

5817break;

5818 }

5819

5820// Try to combine the AddrModes we've collected. If we couldn't collect any,

5821// or we have multiple but either couldn't combine them or combining them

5822// wouldn't do anything useful, bail out now.

5823if (!AddrModes.combineAddrModes()) {

5824 TPT.rollback(LastKnownGood);

5825returnfalse;

5826 }

5827boolModified = TPT.commit();

5828

5829// Get the combined AddrMode (or the only AddrMode, if we only had one).

5830ExtAddrMode AddrMode = AddrModes.getAddrMode();

5831

5832// If all the instructions matched are already in this BB, don't do anything.

5833// If we saw a Phi node then it is not local definitely, and if we saw a

5834// select then we want to push the address calculation past it even if it's

5835// already in this BB.

5836if (!PhiOrSelectSeen &&none_of(AddrModeInsts, [&](Value *V) {

5837returnIsNonLocalValue(V, MemoryInst->getParent());

5838 })) {

5839LLVM_DEBUG(dbgs() <<"CGP: Found local addrmode: " <<AddrMode

5840 <<"\n");

5841returnModified;

5842 }

5843

5844// Insert this computation right after this user. Since our caller is

5845// scanning from the top of the BB to the bottom, reuse of the expr are

5846// guaranteed to happen later.

5847IRBuilder<> Builder(MemoryInst);

5848

5849// Now that we determined the addressing expression we want to use and know

5850// that we have to sink it into this block. Check to see if we have already

5851// done this for some other load/store instr in this block. If so, reuse

5852// the computation. Before attempting reuse, check if the address is valid

5853// as it may have been erased.

5854

5855WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];

5856

5857Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH :nullptr;

5858Type *IntPtrTy =DL->getIntPtrType(Addr->getType());

5859if (SunkAddr) {

5860LLVM_DEBUG(dbgs() <<"CGP: Reusing nonlocal addrmode: " <<AddrMode

5861 <<" for " << *MemoryInst <<"\n");

5862if (SunkAddr->getType() !=Addr->getType()) {

5863if (SunkAddr->getType()->getPointerAddressSpace() !=

5864Addr->getType()->getPointerAddressSpace() &&

5865 !DL->isNonIntegralPointerType(Addr->getType())) {

5866// There are two reasons the address spaces might not match: a no-op

5867// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a

5868// ptrtoint/inttoptr pair to ensure we match the original semantics.

5869// TODO: allow bitcast between different address space pointers with the

5870// same size.

5871 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy,"sunkaddr");

5872 SunkAddr =

5873 Builder.CreateIntToPtr(SunkAddr,Addr->getType(),"sunkaddr");

5874 }else

5875 SunkAddr = Builder.CreatePointerCast(SunkAddr,Addr->getType());

5876 }

5877 }elseif (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&

5878 SubtargetInfo->addrSinkUsingGEPs())) {

5879// By default, we use the GEP-based method when AA is used later. This

5880// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.

5881LLVM_DEBUG(dbgs() <<"CGP: SINKING nonlocal addrmode: " <<AddrMode

5882 <<" for " << *MemoryInst <<"\n");

5883Value *ResultPtr =nullptr, *ResultIndex =nullptr;

5884

5885// First, find the pointer.

5886if (AddrMode.BaseReg &&AddrMode.BaseReg->getType()->isPointerTy()) {

5887 ResultPtr =AddrMode.BaseReg;

5888AddrMode.BaseReg =nullptr;

5889 }

5890

5891if (AddrMode.Scale &&AddrMode.ScaledReg->getType()->isPointerTy()) {

5892// We can't add more than one pointer together, nor can we scale a

5893// pointer (both of which seem meaningless).

5894if (ResultPtr ||AddrMode.Scale != 1)

5895returnModified;

5896

5897 ResultPtr =AddrMode.ScaledReg;

5898AddrMode.Scale = 0;

5899 }

5900

5901// It is only safe to sign extend the BaseReg if we know that the math

5902// required to create it did not overflow before we extend it. Since

5903// the original IR value was tossed in favor of a constant back when

5904// the AddrMode was created we need to bail out gracefully if widths

5905// do not match instead of extending it.

5906//

5907// (See below for code to add the scale.)

5908if (AddrMode.Scale) {

5909Type *ScaledRegTy =AddrMode.ScaledReg->getType();

5910if (cast<IntegerType>(IntPtrTy)->getBitWidth() >

5911 cast<IntegerType>(ScaledRegTy)->getBitWidth())

5912returnModified;

5913 }

5914

5915GlobalValue *BaseGV =AddrMode.BaseGV;

5916if (BaseGV !=nullptr) {

5917if (ResultPtr)

5918returnModified;

5919

5920if (BaseGV->isThreadLocal()) {

5921 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);

5922 }else {

5923 ResultPtr = BaseGV;

5924 }

5925 }

5926

5927// If the real base value actually came from an inttoptr, then the matcher

5928// will look through it and provide only the integer value. In that case,

5929// use it here.

5930if (!DL->isNonIntegralPointerType(Addr->getType())) {

5931if (!ResultPtr &&AddrMode.BaseReg) {

5932 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg,Addr->getType(),

5933"sunkaddr");

5934AddrMode.BaseReg =nullptr;

5935 }elseif (!ResultPtr &&AddrMode.Scale == 1) {

5936 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg,Addr->getType(),

5937"sunkaddr");

5938AddrMode.Scale = 0;

5939 }

5940 }

5941

5942if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&

5943 !AddrMode.BaseOffs) {

5944 SunkAddr =Constant::getNullValue(Addr->getType());

5945 }elseif (!ResultPtr) {

5946returnModified;

5947 }else {

5948Type *I8PtrTy =

5949 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());

5950

5951// Start with the base register. Do this first so that subsequent address

5952// matching finds it last, which will prevent it from trying to match it

5953// as the scaled value in case it happens to be a mul. That would be

5954// problematic if we've sunk a different mul for the scale, because then

5955// we'd end up sinking both muls.

5956if (AddrMode.BaseReg) {

5957Value *V =AddrMode.BaseReg;

5958if (V->getType() != IntPtrTy)

5959V = Builder.CreateIntCast(V, IntPtrTy,/*isSigned=*/true,"sunkaddr");

5960

5961 ResultIndex =V;

5962 }

5963

5964// Add the scale value.

5965if (AddrMode.Scale) {

5966Value *V =AddrMode.ScaledReg;

5967if (V->getType() == IntPtrTy) {

5968// done.

5969 }else {

5970assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <

5971 cast<IntegerType>(V->getType())->getBitWidth() &&

5972"We can't transform if ScaledReg is too narrow");

5973V = Builder.CreateTrunc(V, IntPtrTy,"sunkaddr");

5974 }

5975

5976if (AddrMode.Scale != 1)

5977V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy,AddrMode.Scale),

5978"sunkaddr");

5979if (ResultIndex)

5980 ResultIndex = Builder.CreateAdd(ResultIndex, V,"sunkaddr");

5981else

5982 ResultIndex =V;

5983 }

5984

5985// Add in the Base Offset if present.

5986if (AddrMode.BaseOffs) {

5987Value *V = ConstantInt::get(IntPtrTy,AddrMode.BaseOffs);

5988if (ResultIndex) {

5989// We need to add this separately from the scale above to help with

5990// SDAG consecutive load/store merging.

5991if (ResultPtr->getType() != I8PtrTy)

5992 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);

5993 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex,"sunkaddr",

5994AddrMode.InBounds);

5995 }

5996

5997 ResultIndex =V;

5998 }

5999

6000if (!ResultIndex) {

6001 SunkAddr = ResultPtr;

6002 }else {

6003if (ResultPtr->getType() != I8PtrTy)

6004 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);

6005 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex,"sunkaddr",

6006AddrMode.InBounds);

6007 }

6008

6009if (SunkAddr->getType() !=Addr->getType()) {

6010if (SunkAddr->getType()->getPointerAddressSpace() !=

6011Addr->getType()->getPointerAddressSpace() &&

6012 !DL->isNonIntegralPointerType(Addr->getType())) {

6013// There are two reasons the address spaces might not match: a no-op

6014// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a

6015// ptrtoint/inttoptr pair to ensure we match the original semantics.

6016// TODO: allow bitcast between different address space pointers with

6017// the same size.

6018 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy,"sunkaddr");

6019 SunkAddr =

6020 Builder.CreateIntToPtr(SunkAddr,Addr->getType(),"sunkaddr");

6021 }else

6022 SunkAddr = Builder.CreatePointerCast(SunkAddr,Addr->getType());

6023 }

6024 }

6025 }else {

6026// We'd require a ptrtoint/inttoptr down the line, which we can't do for

6027// non-integral pointers, so in that case bail out now.

6028Type *BaseTy =AddrMode.BaseReg ?AddrMode.BaseReg->getType() :nullptr;

6029Type *ScaleTy =AddrMode.Scale ?AddrMode.ScaledReg->getType() :nullptr;

6030PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);

6031PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);

6032if (DL->isNonIntegralPointerType(Addr->getType()) ||

6033 (BasePtrTy &&DL->isNonIntegralPointerType(BasePtrTy)) ||

6034 (ScalePtrTy &&DL->isNonIntegralPointerType(ScalePtrTy)) ||

6035 (AddrMode.BaseGV &&

6036DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))

6037returnModified;

6038

6039LLVM_DEBUG(dbgs() <<"CGP: SINKING nonlocal addrmode: " <<AddrMode

6040 <<" for " << *MemoryInst <<"\n");

6041Type *IntPtrTy =DL->getIntPtrType(Addr->getType());

6042Value *Result =nullptr;

6043

6044// Start with the base register. Do this first so that subsequent address

6045// matching finds it last, which will prevent it from trying to match it

6046// as the scaled value in case it happens to be a mul. That would be

6047// problematic if we've sunk a different mul for the scale, because then

6048// we'd end up sinking both muls.

6049if (AddrMode.BaseReg) {

6050Value *V =AddrMode.BaseReg;

6051if (V->getType()->isPointerTy())

6052V = Builder.CreatePtrToInt(V, IntPtrTy,"sunkaddr");

6053if (V->getType() != IntPtrTy)

6054V = Builder.CreateIntCast(V, IntPtrTy,/*isSigned=*/true,"sunkaddr");

6055Result =V;

6056 }

6057

6058// Add the scale value.

6059if (AddrMode.Scale) {

6060Value *V =AddrMode.ScaledReg;

6061if (V->getType() == IntPtrTy) {

6062// done.

6063 }elseif (V->getType()->isPointerTy()) {

6064V = Builder.CreatePtrToInt(V, IntPtrTy,"sunkaddr");

6065 }elseif (cast<IntegerType>(IntPtrTy)->getBitWidth() <

6066 cast<IntegerType>(V->getType())->getBitWidth()) {

6067V = Builder.CreateTrunc(V, IntPtrTy,"sunkaddr");

6068 }else {

6069// It is only safe to sign extend the BaseReg if we know that the math

6070// required to create it did not overflow before we extend it. Since

6071// the original IR value was tossed in favor of a constant back when

6072// the AddrMode was created we need to bail out gracefully if widths

6073// do not match instead of extending it.

6074Instruction *I = dyn_cast_or_null<Instruction>(Result);

6075if (I && (Result !=AddrMode.BaseReg))

6076I->eraseFromParent();

6077returnModified;

6078 }

6079if (AddrMode.Scale != 1)

6080V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy,AddrMode.Scale),

6081"sunkaddr");

6082if (Result)

6083Result = Builder.CreateAdd(Result, V,"sunkaddr");

6084else

6085Result =V;

6086 }

6087

6088// Add in the BaseGV if present.

6089GlobalValue *BaseGV =AddrMode.BaseGV;

6090if (BaseGV !=nullptr) {

6091Value *BaseGVPtr;

6092if (BaseGV->isThreadLocal()) {

6093 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);

6094 }else {

6095 BaseGVPtr = BaseGV;

6096 }

6097Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy,"sunkaddr");

6098if (Result)

6099Result = Builder.CreateAdd(Result, V,"sunkaddr");

6100else

6101Result =V;

6102 }

6103

6104// Add in the Base Offset if present.

6105if (AddrMode.BaseOffs) {

6106Value *V = ConstantInt::get(IntPtrTy,AddrMode.BaseOffs);

6107if (Result)

6108Result = Builder.CreateAdd(Result, V,"sunkaddr");

6109else

6110Result =V;

6111 }

6112

6113if (!Result)

6114 SunkAddr =Constant::getNullValue(Addr->getType());

6115else

6116 SunkAddr = Builder.CreateIntToPtr(Result,Addr->getType(),"sunkaddr");

6117 }

6118

6119 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);

6120// Store the newly computed address into the cache. In the case we reused a

6121// value, this should be idempotent.

6122 SunkAddrs[Addr] =WeakTrackingVH(SunkAddr);

6123

6124// If we have no uses, recursively delete the value and all dead instructions

6125// using it.

6126if (Repl->use_empty()) {

6127 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {

6128 RecursivelyDeleteTriviallyDeadInstructions(

6129 Repl, TLInfo, nullptr,

6130 [&](Value *V) { removeAllAssertingVHReferences(V); });

6131 });

6132 }

6133 ++NumMemoryInsts;

6134returntrue;

6135}

6136

6137/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find

6138/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can

6139/// only handle a 2 operand GEP in the same basic block or a splat constant

6140/// vector. The 2 operands to the GEP must have a scalar pointer and a vector

6141/// index.

6142///

6143/// If the existing GEP has a vector base pointer that is splat, we can look

6144/// through the splat to find the scalar pointer. If we can't find a scalar

6145/// pointer there's nothing we can do.

6146///

6147/// If we have a GEP with more than 2 indices where the middle indices are all

6148/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.

6149///

6150/// If the final index isn't a vector or is a splat, we can emit a scalar GEP

6151/// followed by a GEP with an all zeroes vector index. This will enable

6152/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a

6153/// zero index.

6154bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,

6155Value *Ptr) {

6156Value *NewAddr;

6157

6158if (constauto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {

6159// Don't optimize GEPs that don't have indices.

6160if (!GEP->hasIndices())

6161returnfalse;

6162

6163// If the GEP and the gather/scatter aren't in the same BB, don't optimize.

6164// FIXME: We should support this by sinking the GEP.

6165if (MemoryInst->getParent() !=GEP->getParent())

6166returnfalse;

6167

6168SmallVector<Value *, 2> Ops(GEP->operands());

6169

6170bool RewriteGEP =false;

6171

6172if (Ops[0]->getType()->isVectorTy()) {

6173 Ops[0] =getSplatValue(Ops[0]);

6174if (!Ops[0])

6175returnfalse;

6176 RewriteGEP =true;

6177 }

6178

6179unsigned FinalIndex = Ops.size() - 1;

6180

6181// Ensure all but the last index is 0.

6182// FIXME: This isn't strictly required. All that's required is that they are

6183// all scalars or splats.

6184for (unsigned i = 1; i < FinalIndex; ++i) {

6185auto *C = dyn_cast<Constant>(Ops[i]);

6186if (!C)

6187returnfalse;

6188if (isa<VectorType>(C->getType()))

6189C =C->getSplatValue();

6190auto *CI = dyn_cast_or_null<ConstantInt>(C);

6191if (!CI || !CI->isZero())

6192returnfalse;

6193// Scalarize the index if needed.

6194 Ops[i] = CI;

6195 }

6196

6197// Try to scalarize the final index.

6198if (Ops[FinalIndex]->getType()->isVectorTy()) {

6199if (Value *V =getSplatValue(Ops[FinalIndex])) {

6200auto *C = dyn_cast<ConstantInt>(V);

6201// Don't scalarize all zeros vector.

6202if (!C || !C->isZero()) {

6203 Ops[FinalIndex] =V;

6204 RewriteGEP =true;

6205 }

6206 }

6207 }

6208

6209// If we made any changes or the we have extra operands, we need to generate

6210// new instructions.

6211if (!RewriteGEP && Ops.size() == 2)

6212returnfalse;

6213

6214auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();

6215

6216IRBuilder<> Builder(MemoryInst);

6217

6218Type *SourceTy =GEP->getSourceElementType();

6219Type *ScalarIndexTy =DL->getIndexType(Ops[0]->getType()->getScalarType());

6220

6221// If the final index isn't a vector, emit a scalar GEP containing all ops

6222// and a vector GEP with all zeroes final index.

6223if (!Ops[FinalIndex]->getType()->isVectorTy()) {

6224 NewAddr = Builder.CreateGEP(SourceTy, Ops[0],ArrayRef(Ops).drop_front());

6225auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);

6226auto *SecondTy =GetElementPtrInst::getIndexedType(

6227 SourceTy,ArrayRef(Ops).drop_front());

6228 NewAddr =

6229 Builder.CreateGEP(SecondTy, NewAddr,Constant::getNullValue(IndexTy));

6230 }else {

6231Value *Base = Ops[0];

6232Value *Index = Ops[FinalIndex];

6233

6234// Create a scalar GEP if there are more than 2 operands.

6235if (Ops.size() != 2) {

6236// Replace the last index with 0.

6237 Ops[FinalIndex] =

6238Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());

6239Base = Builder.CreateGEP(SourceTy,Base,ArrayRef(Ops).drop_front());

6240 SourceTy =GetElementPtrInst::getIndexedType(

6241 SourceTy,ArrayRef(Ops).drop_front());

6242 }

6243

6244// Now create the GEP with scalar pointer and vector index.

6245 NewAddr = Builder.CreateGEP(SourceTy,Base, Index);

6246 }

6247 }elseif (!isa<Constant>(Ptr)) {

6248// Not a GEP, maybe its a splat and we can create a GEP to enable

6249// SelectionDAGBuilder to use it as a uniform base.

6250Value *V =getSplatValue(Ptr);

6251if (!V)

6252returnfalse;

6253

6254auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();

6255

6256IRBuilder<> Builder(MemoryInst);

6257

6258// Emit a vector GEP with a scalar pointer and all 0s vector index.

6259Type *ScalarIndexTy =DL->getIndexType(V->getType()->getScalarType());

6260auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);

6261Type *ScalarTy;

6262if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==

6263 Intrinsic::masked_gather) {

6264 ScalarTy = MemoryInst->getType()->getScalarType();

6265 }else {

6266assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==

6267 Intrinsic::masked_scatter);

6268 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();

6269 }

6270 NewAddr = Builder.CreateGEP(ScalarTy, V,Constant::getNullValue(IndexTy));

6271 }else {

6272// Constant, SelectionDAGBuilder knows to check if its a splat.

6273returnfalse;

6274 }

6275

6276 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);

6277

6278// If we have no uses, recursively delete the value and all dead instructions

6279// using it.

6280if (Ptr->use_empty())

6281RecursivelyDeleteTriviallyDeadInstructions(

6282Ptr, TLInfo,nullptr,

6283 [&](Value *V) { removeAllAssertingVHReferences(V); });

6284

6285returntrue;

6286}

6287

6288/// If there are any memory operands, use OptimizeMemoryInst to sink their

6289/// address computing into the block when possible / profitable.

6290bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {

6291bool MadeChange =false;

6292

6293constTargetRegisterInfo *TRI =

6294TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();

6295TargetLowering::AsmOperandInfoVector TargetConstraints =

6296 TLI->ParseConstraints(*DL,TRI, *CS);

6297unsigned ArgNo = 0;

6298for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {

6299// Compute the constraint code and ConstraintType to use.

6300 TLI->ComputeConstraintToUse(OpInfo,SDValue());

6301

6302// TODO: Also handle C_Address?

6303if (OpInfo.ConstraintType ==TargetLowering::C_Memory &&

6304 OpInfo.isIndirect) {

6305Value *OpVal = CS->getArgOperand(ArgNo++);

6306 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);

6307 }elseif (OpInfo.Type ==InlineAsm::isInput)

6308 ArgNo++;

6309 }

6310

6311return MadeChange;

6312}

6313

6314/// Check if all the uses of \p Val are equivalent (or free) zero or

6315/// sign extensions.

6316staticboolhasSameExtUse(Value *Val,constTargetLowering &TLI) {

6317assert(!Val->use_empty() &&"Input must have at least one use");

6318constInstruction *FirstUser = cast<Instruction>(*Val->user_begin());

6319bool IsSExt = isa<SExtInst>(FirstUser);

6320Type *ExtTy = FirstUser->getType();

6321for (constUser *U : Val->users()) {

6322constInstruction *UI = cast<Instruction>(U);

6323if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))

6324returnfalse;

6325Type *CurTy = UI->getType();

6326// Same input and output types: Same instruction after CSE.

6327if (CurTy == ExtTy)

6328continue;

6329

6330// If IsSExt is true, we are in this situation:

6331// a = Val

6332// b = sext ty1 a to ty2

6333// c = sext ty1 a to ty3

6334// Assuming ty2 is shorter than ty3, this could be turned into:

6335// a = Val

6336// b = sext ty1 a to ty2

6337// c = sext ty2 b to ty3

6338// However, the last sext is not free.

6339if (IsSExt)

6340returnfalse;

6341

6342// This is a ZExt, maybe this is free to extend from one type to another.

6343// In that case, we would not account for a different use.

6344Type *NarrowTy;

6345Type *LargeTy;

6346if (ExtTy->getScalarType()->getIntegerBitWidth() >

6347 CurTy->getScalarType()->getIntegerBitWidth()) {

6348 NarrowTy = CurTy;

6349 LargeTy = ExtTy;

6350 }else {

6351 NarrowTy = ExtTy;

6352 LargeTy = CurTy;

6353 }

6354

6355if (!TLI.isZExtFree(NarrowTy, LargeTy))

6356returnfalse;

6357 }

6358// All uses are the same or can be derived from one another for free.

6359returntrue;

6360}

6361

6362/// Try to speculatively promote extensions in \p Exts and continue

6363/// promoting through newly promoted operands recursively as far as doing so is

6364/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.

6365/// When some promotion happened, \p TPT contains the proper state to revert

6366/// them.

6367///

6368/// \return true if some promotion happened, false otherwise.

6369bool CodeGenPrepare::tryToPromoteExts(

6370 TypePromotionTransaction &TPT,constSmallVectorImpl<Instruction *> &Exts,

6371SmallVectorImpl<Instruction *> &ProfitablyMovedExts,

6372unsigned CreatedInstsCost) {

6373bool Promoted =false;

6374

6375// Iterate over all the extensions to try to promote them.

6376for (auto *I : Exts) {

6377// Early check if we directly have ext(load).

6378if (isa<LoadInst>(I->getOperand(0))) {

6379 ProfitablyMovedExts.push_back(I);

6380continue;

6381 }

6382

6383// Check whether or not we want to do any promotion. The reason we have

6384// this check inside the for loop is to catch the case where an extension

6385// is directly fed by a load because in such case the extension can be moved

6386// up without any promotion on its operands.

6387if (!TLI->enableExtLdPromotion() ||DisableExtLdPromotion)

6388returnfalse;

6389

6390// Get the action to perform the promotion.

6391 TypePromotionHelper::Action TPH =

6392 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);

6393// Check if we can promote.

6394if (!TPH) {

6395// Save the current extension as we cannot move up through its operand.

6396 ProfitablyMovedExts.push_back(I);

6397continue;

6398 }

6399

6400// Save the current state.

6401 TypePromotionTransaction::ConstRestorationPt LastKnownGood =

6402 TPT.getRestorationPoint();

6403SmallVector<Instruction *, 4> NewExts;

6404unsigned NewCreatedInstsCost = 0;

6405unsigned ExtCost = !TLI->isExtFree(I);

6406// Promote.

6407Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,

6408 &NewExts,nullptr, *TLI);

6409assert(PromotedVal &&

6410"TypePromotionHelper should have filtered out those cases");

6411

6412// We would be able to merge only one extension in a load.

6413// Therefore, if we have more than 1 new extension we heuristically

6414// cut this search path, because it means we degrade the code quality.

6415// With exactly 2, the transformation is neutral, because we will merge

6416// one extension but leave one. However, we optimistically keep going,

6417// because the new extension may be removed too. Also avoid replacing a

6418// single free extension with multiple extensions, as this increases the

6419// number of IR instructions while not providing any savings.

6420longlong TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;

6421// FIXME: It would be possible to propagate a negative value instead of

6422// conservatively ceiling it to 0.

6423 TotalCreatedInstsCost =

6424 std::max((longlong)0, (TotalCreatedInstsCost - ExtCost));

6425if (!StressExtLdPromotion &&

6426 (TotalCreatedInstsCost > 1 ||

6427 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||

6428 (ExtCost == 0 && NewExts.size() > 1))) {

6429// This promotion is not profitable, rollback to the previous state, and

6430// save the current extension in ProfitablyMovedExts as the latest

6431// speculative promotion turned out to be unprofitable.

6432 TPT.rollback(LastKnownGood);

6433 ProfitablyMovedExts.push_back(I);

6434continue;

6435 }

6436// Continue promoting NewExts as far as doing so is profitable.

6437SmallVector<Instruction *, 2> NewlyMovedExts;

6438 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);

6439bool NewPromoted =false;

6440for (auto *ExtInst : NewlyMovedExts) {

6441Instruction *MovedExt = cast<Instruction>(ExtInst);

6442Value *ExtOperand = MovedExt->getOperand(0);

6443// If we have reached to a load, we need this extra profitability check

6444// as it could potentially be merged into an ext(load).

6445if (isa<LoadInst>(ExtOperand) &&

6446 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||

6447 (ExtOperand->hasOneUse() ||hasSameExtUse(ExtOperand, *TLI))))

6448continue;

6449

6450 ProfitablyMovedExts.push_back(MovedExt);

6451 NewPromoted =true;

6452 }

6453

6454// If none of speculative promotions for NewExts is profitable, rollback

6455// and save the current extension (I) as the last profitable extension.

6456if (!NewPromoted) {

6457 TPT.rollback(LastKnownGood);

6458 ProfitablyMovedExts.push_back(I);

6459continue;

6460 }

6461// The promotion is profitable.

6462 Promoted =true;

6463 }

6464return Promoted;

6465}

6466

6467/// Merging redundant sexts when one is dominating the other.

6468bool CodeGenPrepare::mergeSExts(Function &F) {

6469bool Changed =false;

6470for (auto &Entry : ValToSExtendedUses) {

6471 SExts &Insts =Entry.second;

6472 SExts CurPts;

6473for (Instruction *Inst : Insts) {

6474if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||

6475 Inst->getOperand(0) !=Entry.first)

6476continue;

6477bool inserted =false;

6478for (auto &Pt : CurPts) {

6479if (getDT(F).dominates(Inst, Pt)) {

6480replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);

6481 RemovedInsts.insert(Pt);

6482 Pt->removeFromParent();

6483 Pt = Inst;

6484 inserted =true;

6485 Changed =true;

6486break;

6487 }

6488if (!getDT(F).dominates(Pt, Inst))

6489// Give up if we need to merge in a common dominator as the

6490// experiments show it is not profitable.

6491continue;

6492replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);

6493 RemovedInsts.insert(Inst);

6494 Inst->removeFromParent();

6495 inserted =true;

6496 Changed =true;

6497break;

6498 }

6499if (!inserted)

6500 CurPts.push_back(Inst);

6501 }

6502 }

6503return Changed;

6504}

6505

6506// Splitting large data structures so that the GEPs accessing them can have

6507// smaller offsets so that they can be sunk to the same blocks as their users.

6508// For example, a large struct starting from %base is split into two parts

6509// where the second part starts from %new_base.

6510//

6511// Before:

6512// BB0:

6513// %base =

6514//

6515// BB1:

6516// %gep0 = gep %base, off0

6517// %gep1 = gep %base, off1

6518// %gep2 = gep %base, off2

6519//

6520// BB2:

6521// %load1 = load %gep0

6522// %load2 = load %gep1

6523// %load3 = load %gep2

6524//

6525// After:

6526// BB0:

6527// %base =

6528// %new_base = gep %base, off0

6529//

6530// BB1:

6531// %new_gep0 = %new_base

6532// %new_gep1 = gep %new_base, off1 - off0

6533// %new_gep2 = gep %new_base, off2 - off0

6534//

6535// BB2:

6536// %load1 = load i32, i32* %new_gep0

6537// %load2 = load i32, i32* %new_gep1

6538// %load3 = load i32, i32* %new_gep2

6539//

6540// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because

6541// their offsets are smaller enough to fit into the addressing mode.

6542bool CodeGenPrepare::splitLargeGEPOffsets() {

6543bool Changed =false;

6544for (auto &Entry : LargeOffsetGEPMap) {

6545Value *OldBase =Entry.first;

6546SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>

6547 &LargeOffsetGEPs =Entry.second;

6548auto compareGEPOffset =

6549 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,

6550const std::pair<GetElementPtrInst *, int64_t> &RHS) {

6551if (LHS.first ==RHS.first)

6552returnfalse;

6553if (LHS.second !=RHS.second)

6554returnLHS.second <RHS.second;

6555return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];

6556 };

6557// Sorting all the GEPs of the same data structures based on the offsets.

6558llvm::sort(LargeOffsetGEPs, compareGEPOffset);

6559 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());

6560// Skip if all the GEPs have the same offsets.

6561if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)

6562continue;

6563GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;

6564 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;

6565Value *NewBaseGEP =nullptr;

6566

6567auto createNewBase = [&](int64_t BaseOffset,Value *OldBase,

6568GetElementPtrInst *GEP) {

6569LLVMContext &Ctx =GEP->getContext();

6570Type *PtrIdxTy =DL->getIndexType(GEP->getType());

6571Type *I8PtrTy =

6572 PointerType::get(Ctx,GEP->getType()->getPointerAddressSpace());

6573

6574BasicBlock::iterator NewBaseInsertPt;

6575BasicBlock *NewBaseInsertBB;

6576if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {

6577// If the base of the struct is an instruction, the new base will be

6578// inserted close to it.

6579 NewBaseInsertBB = BaseI->getParent();

6580if (isa<PHINode>(BaseI))

6581 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();

6582elseif (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {

6583 NewBaseInsertBB =

6584SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);

6585 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();

6586 }else

6587 NewBaseInsertPt = std::next(BaseI->getIterator());

6588 }else {

6589// If the current base is an argument or global value, the new base

6590// will be inserted to the entry block.

6591 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();

6592 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();

6593 }

6594IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);

6595// Create a new base.

6596Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);

6597 NewBaseGEP = OldBase;

6598if (NewBaseGEP->getType() != I8PtrTy)

6599 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);

6600 NewBaseGEP =

6601 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex,"splitgep");

6602 NewGEPBases.insert(NewBaseGEP);

6603return;

6604 };

6605

6606// Check whether all the offsets can be encoded with prefered common base.

6607if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(

6608 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {

6609 BaseOffset = PreferBase;

6610// Create a new base if the offset of the BaseGEP can be decoded with one

6611// instruction.

6612 createNewBase(BaseOffset, OldBase, BaseGEP);

6613 }

6614

6615auto *LargeOffsetGEP = LargeOffsetGEPs.begin();

6616while (LargeOffsetGEP != LargeOffsetGEPs.end()) {

6617GetElementPtrInst *GEP = LargeOffsetGEP->first;

6618 int64_tOffset = LargeOffsetGEP->second;

6619if (Offset != BaseOffset) {

6620TargetLowering::AddrMode AddrMode;

6621AddrMode.HasBaseReg =true;

6622AddrMode.BaseOffs =Offset - BaseOffset;

6623// The result type of the GEP might not be the type of the memory

6624// access.

6625if (!TLI->isLegalAddressingMode(*DL,AddrMode,

6626GEP->getResultElementType(),

6627GEP->getAddressSpace())) {

6628// We need to create a new base if the offset to the current base is

6629// too large to fit into the addressing mode. So, a very large struct

6630// may be split into several parts.

6631 BaseGEP =GEP;

6632 BaseOffset =Offset;

6633 NewBaseGEP =nullptr;

6634 }

6635 }

6636

6637// Generate a new GEP to replace the current one.

6638Type *PtrIdxTy =DL->getIndexType(GEP->getType());

6639

6640if (!NewBaseGEP) {

6641// Create a new base if we don't have one yet. Find the insertion

6642// pointer for the new base first.

6643 createNewBase(BaseOffset, OldBase,GEP);

6644 }

6645

6646IRBuilder<> Builder(GEP);

6647Value *NewGEP = NewBaseGEP;

6648if (Offset != BaseOffset) {

6649// Calculate the new offset for the new GEP.

6650Value *Index = ConstantInt::get(PtrIdxTy,Offset - BaseOffset);

6651 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);

6652 }

6653replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);

6654 LargeOffsetGEPID.erase(GEP);

6655 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);

6656GEP->eraseFromParent();

6657 Changed =true;

6658 }

6659 }

6660return Changed;

6661}

6662

6663bool CodeGenPrepare::optimizePhiType(

6664PHINode *I,SmallPtrSetImpl<PHINode *> &Visited,

6665SmallPtrSetImpl<Instruction *> &DeletedInstrs) {

6666// We are looking for a collection on interconnected phi nodes that together

6667// only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts

6668// are of the same type. Convert the whole set of nodes to the type of the

6669// bitcast.

6670Type *PhiTy =I->getType();

6671Type *ConvertTy =nullptr;

6672if (Visited.count(I) ||

6673 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))

6674returnfalse;

6675

6676SmallVector<Instruction *, 4> Worklist;

6677 Worklist.push_back(cast<Instruction>(I));

6678SmallPtrSet<PHINode *, 4> PhiNodes;

6679SmallPtrSet<ConstantData *, 4>Constants;

6680 PhiNodes.insert(I);

6681 Visited.insert(I);

6682SmallPtrSet<Instruction *, 4> Defs;

6683SmallPtrSet<Instruction *, 4>Uses;

6684// This works by adding extra bitcasts between load/stores and removing

6685// existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))

6686// we can get in the situation where we remove a bitcast in one iteration

6687// just to add it again in the next. We need to ensure that at least one

6688// bitcast we remove are anchored to something that will not change back.

6689bool AnyAnchored =false;

6690

6691while (!Worklist.empty()) {

6692Instruction *II = Worklist.pop_back_val();

6693

6694if (auto *Phi = dyn_cast<PHINode>(II)) {

6695// Handle Defs, which might also be PHI's

6696for (Value *V :Phi->incoming_values()) {

6697if (auto *OpPhi = dyn_cast<PHINode>(V)) {

6698if (!PhiNodes.count(OpPhi)) {

6699if (!Visited.insert(OpPhi).second)

6700returnfalse;

6701 PhiNodes.insert(OpPhi);

6702 Worklist.push_back(OpPhi);

6703 }

6704 }elseif (auto *OpLoad = dyn_cast<LoadInst>(V)) {

6705if (!OpLoad->isSimple())

6706returnfalse;

6707if (Defs.insert(OpLoad).second)

6708 Worklist.push_back(OpLoad);

6709 }elseif (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {

6710if (Defs.insert(OpEx).second)

6711 Worklist.push_back(OpEx);

6712 }elseif (auto *OpBC = dyn_cast<BitCastInst>(V)) {

6713if (!ConvertTy)

6714 ConvertTy = OpBC->getOperand(0)->getType();

6715if (OpBC->getOperand(0)->getType() != ConvertTy)

6716returnfalse;

6717if (Defs.insert(OpBC).second) {

6718 Worklist.push_back(OpBC);

6719 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&

6720 !isa<ExtractElementInst>(OpBC->getOperand(0));

6721 }

6722 }elseif (auto *OpC = dyn_cast<ConstantData>(V))

6723Constants.insert(OpC);

6724else

6725returnfalse;

6726 }

6727 }

6728

6729// Handle uses which might also be phi's

6730for (User *V :II->users()) {

6731if (auto *OpPhi = dyn_cast<PHINode>(V)) {

6732if (!PhiNodes.count(OpPhi)) {

6733if (Visited.count(OpPhi))

6734returnfalse;

6735 PhiNodes.insert(OpPhi);

6736 Visited.insert(OpPhi);

6737 Worklist.push_back(OpPhi);

6738 }

6739 }elseif (auto *OpStore = dyn_cast<StoreInst>(V)) {

6740if (!OpStore->isSimple() || OpStore->getOperand(0) !=II)

6741returnfalse;

6742Uses.insert(OpStore);

6743 }elseif (auto *OpBC = dyn_cast<BitCastInst>(V)) {

6744if (!ConvertTy)

6745 ConvertTy = OpBC->getType();

6746if (OpBC->getType() != ConvertTy)

6747returnfalse;

6748Uses.insert(OpBC);

6749 AnyAnchored |=

6750any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });

6751 }else {

6752returnfalse;

6753 }

6754 }

6755 }

6756

6757if (!ConvertTy || !AnyAnchored ||

6758 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))

6759returnfalse;

6760

6761LLVM_DEBUG(dbgs() <<"Converting " << *I <<"\n and connected nodes to "

6762 << *ConvertTy <<"\n");

6763

6764// Create all the new phi nodes of the new type, and bitcast any loads to the

6765// correct type.

6766ValueToValueMap ValMap;

6767for (ConstantData *C : Constants)

6768 ValMap[C] =ConstantExpr::getBitCast(C, ConvertTy);

6769for (Instruction *D : Defs) {

6770if (isa<BitCastInst>(D)) {

6771 ValMap[D] =D->getOperand(0);

6772 DeletedInstrs.insert(D);

6773 }else {

6774BasicBlock::iterator insertPt = std::next(D->getIterator());

6775 ValMap[D] =newBitCastInst(D, ConvertTy,D->getName() +".bc", insertPt);

6776 }

6777 }

6778for (PHINode *Phi : PhiNodes)

6779 ValMap[Phi] =PHINode::Create(ConvertTy,Phi->getNumIncomingValues(),

6780Phi->getName() +".tc",Phi->getIterator());

6781// Pipe together all the PhiNodes.

6782for (PHINode *Phi : PhiNodes) {

6783PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);

6784for (int i = 0, e =Phi->getNumIncomingValues(); i < e; i++)

6785 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],

6786Phi->getIncomingBlock(i));

6787 Visited.insert(NewPhi);

6788 }

6789// And finally pipe up the stores and bitcasts

6790for (Instruction *U :Uses) {

6791if (isa<BitCastInst>(U)) {

6792 DeletedInstrs.insert(U);

6793replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);

6794 }else {

6795U->setOperand(0,newBitCastInst(ValMap[U->getOperand(0)], PhiTy,"bc",

6796U->getIterator()));

6797 }

6798 }

6799

6800// Save the removed phis to be deleted later.

6801for (PHINode *Phi : PhiNodes)

6802 DeletedInstrs.insert(Phi);

6803returntrue;

6804}

6805

6806bool CodeGenPrepare::optimizePhiTypes(Function &F) {

6807if (!OptimizePhiTypes)

6808returnfalse;

6809

6810bool Changed =false;

6811SmallPtrSet<PHINode *, 4> Visited;

6812SmallPtrSet<Instruction *, 4> DeletedInstrs;

6813

6814// Attempt to optimize all the phis in the functions to the correct type.

6815for (auto &BB :F)

6816for (auto &Phi : BB.phis())

6817 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);

6818

6819// Remove any old phi's that have been converted.

6820for (auto *I : DeletedInstrs) {

6821replaceAllUsesWith(I,PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);

6822I->eraseFromParent();

6823 }

6824

6825return Changed;

6826}

6827

6828/// Return true, if an ext(load) can be formed from an extension in

6829/// \p MovedExts.

6830bool CodeGenPrepare::canFormExtLd(

6831constSmallVectorImpl<Instruction *> &MovedExts,LoadInst *&LI,

6832Instruction *&Inst,bool HasPromoted) {

6833for (auto *MovedExtInst : MovedExts) {

6834if (isa<LoadInst>(MovedExtInst->getOperand(0))) {

6835 LI = cast<LoadInst>(MovedExtInst->getOperand(0));

6836 Inst = MovedExtInst;

6837break;

6838 }

6839 }

6840if (!LI)

6841returnfalse;

6842

6843// If they're already in the same block, there's nothing to do.

6844// Make the cheap checks first if we did not promote.

6845// If we promoted, we need to check if it is indeed profitable.

6846if (!HasPromoted && LI->getParent() == Inst->getParent())

6847returnfalse;

6848

6849return TLI->isExtLoad(LI, Inst, *DL);

6850}

6851

6852/// Move a zext or sext fed by a load into the same basic block as the load,

6853/// unless conditions are unfavorable. This allows SelectionDAG to fold the

6854/// extend into the load.

6855///

6856/// E.g.,

6857/// \code

6858/// %ld = load i32* %addr

6859/// %add = add nuw i32 %ld, 4

6860/// %zext = zext i32 %add to i64

6861// \endcode

6862/// =>

6863/// \code

6864/// %ld = load i32* %addr

6865/// %zext = zext i32 %ld to i64

6866/// %add = add nuw i64 %zext, 4

6867/// \encode

6868/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which

6869/// allow us to match zext(load i32*) to i64.

6870///

6871/// Also, try to promote the computations used to obtain a sign extended

6872/// value used into memory accesses.

6873/// E.g.,

6874/// \code

6875/// a = add nsw i32 b, 3

6876/// d = sext i32 a to i64

6877/// e = getelementptr ..., i64 d

6878/// \endcode

6879/// =>

6880/// \code

6881/// f = sext i32 b to i64

6882/// a = add nsw i64 f, 3

6883/// e = getelementptr ..., i64 a

6884/// \endcode

6885///

6886/// \p Inst[in/out] the extension may be modified during the process if some

6887/// promotions apply.

6888bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {

6889bool AllowPromotionWithoutCommonHeader =false;

6890 /// See if it is an interesting sext operations for the address type

6891 /// promotion before trying to promote it, e.g., the ones with the right

6892 /// type and used in memory accesses.

6893bool ATPConsiderable =TTI->shouldConsiderAddressTypePromotion(

6894 *Inst, AllowPromotionWithoutCommonHeader);

6895 TypePromotionTransaction TPT(RemovedInsts);

6896 TypePromotionTransaction::ConstRestorationPt LastKnownGood =

6897 TPT.getRestorationPoint();

6898SmallVector<Instruction *, 1> Exts;

6899SmallVector<Instruction *, 2> SpeculativelyMovedExts;

6900 Exts.push_back(Inst);

6901

6902bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);

6903

6904// Look for a load being extended.

6905LoadInst *LI =nullptr;

6906Instruction *ExtFedByLoad;

6907

6908// Try to promote a chain of computation if it allows to form an extended

6909// load.

6910if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {

6911assert(LI && ExtFedByLoad &&"Expect a valid load and extension");

6912 TPT.commit();

6913// Move the extend into the same block as the load.

6914 ExtFedByLoad->moveAfter(LI);

6915 ++NumExtsMoved;

6916 Inst = ExtFedByLoad;

6917returntrue;

6918 }

6919

6920// Continue promoting SExts if known as considerable depending on targets.

6921if (ATPConsiderable &&

6922 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,

6923 HasPromoted, TPT, SpeculativelyMovedExts))

6924returntrue;

6925

6926 TPT.rollback(LastKnownGood);

6927returnfalse;

6928}

6929

6930// Perform address type promotion if doing so is profitable.

6931// If AllowPromotionWithoutCommonHeader == false, we should find other sext

6932// instructions that sign extended the same initial value. However, if

6933// AllowPromotionWithoutCommonHeader == true, we expect promoting the

6934// extension is just profitable.

6935bool CodeGenPrepare::performAddressTypePromotion(

6936Instruction *&Inst,bool AllowPromotionWithoutCommonHeader,

6937bool HasPromoted, TypePromotionTransaction &TPT,

6938SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {

6939bool Promoted =false;

6940SmallPtrSet<Instruction *, 1> UnhandledExts;

6941bool AllSeenFirst =true;

6942for (auto *I : SpeculativelyMovedExts) {

6943Value *HeadOfChain =I->getOperand(0);

6944DenseMap<Value *, Instruction *>::iterator AlreadySeen =

6945 SeenChainsForSExt.find(HeadOfChain);

6946// If there is an unhandled SExt which has the same header, try to promote

6947// it as well.

6948if (AlreadySeen != SeenChainsForSExt.end()) {

6949if (AlreadySeen->second !=nullptr)

6950 UnhandledExts.insert(AlreadySeen->second);

6951 AllSeenFirst =false;

6952 }

6953 }

6954

6955if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&

6956 SpeculativelyMovedExts.size() == 1)) {

6957 TPT.commit();

6958if (HasPromoted)

6959 Promoted =true;

6960for (auto *I : SpeculativelyMovedExts) {

6961Value *HeadOfChain =I->getOperand(0);

6962 SeenChainsForSExt[HeadOfChain] =nullptr;

6963 ValToSExtendedUses[HeadOfChain].push_back(I);

6964 }

6965// Update Inst as promotion happen.

6966 Inst = SpeculativelyMovedExts.pop_back_val();

6967 }else {

6968// This is the first chain visited from the header, keep the current chain

6969// as unhandled. Defer to promote this until we encounter another SExt

6970// chain derived from the same header.

6971for (auto *I : SpeculativelyMovedExts) {

6972Value *HeadOfChain =I->getOperand(0);

6973 SeenChainsForSExt[HeadOfChain] = Inst;

6974 }

6975returnfalse;

6976 }

6977

6978if (!AllSeenFirst && !UnhandledExts.empty())

6979for (auto *VisitedSExt : UnhandledExts) {

6980if (RemovedInsts.count(VisitedSExt))

6981continue;

6982 TypePromotionTransaction TPT(RemovedInsts);

6983SmallVector<Instruction *, 1> Exts;

6984SmallVector<Instruction *, 2> Chains;

6985 Exts.push_back(VisitedSExt);

6986bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);

6987 TPT.commit();

6988if (HasPromoted)

6989 Promoted =true;

6990for (auto *I : Chains) {

6991Value *HeadOfChain =I->getOperand(0);

6992// Mark this as handled.

6993 SeenChainsForSExt[HeadOfChain] =nullptr;

6994 ValToSExtendedUses[HeadOfChain].push_back(I);

6995 }

6996 }

6997return Promoted;

6998}

6999

7000bool CodeGenPrepare::optimizeExtUses(Instruction *I) {

7001BasicBlock *DefBB =I->getParent();

7002

7003// If the result of a {s|z}ext and its source are both live out, rewrite all

7004// other uses of the source with result of extension.

7005Value *Src =I->getOperand(0);

7006if (Src->hasOneUse())

7007returnfalse;

7008

7009// Only do this xform if truncating is free.

7010if (!TLI->isTruncateFree(I->getType(), Src->getType()))

7011returnfalse;

7012

7013// Only safe to perform the optimization if the source is also defined in

7014// this block.

7015if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())

7016returnfalse;

7017

7018bool DefIsLiveOut =false;

7019for (User *U :I->users()) {

7020Instruction *UI = cast<Instruction>(U);

7021

7022// Figure out which BB this ext is used in.

7023BasicBlock *UserBB = UI->getParent();

7024if (UserBB == DefBB)

7025continue;

7026 DefIsLiveOut =true;

7027break;

7028 }

7029if (!DefIsLiveOut)

7030returnfalse;

7031

7032// Make sure none of the uses are PHI nodes.

7033for (User *U : Src->users()) {

7034Instruction *UI = cast<Instruction>(U);

7035BasicBlock *UserBB = UI->getParent();

7036if (UserBB == DefBB)

7037continue;

7038// Be conservative. We don't want this xform to end up introducing

7039// reloads just before load / store instructions.

7040if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))

7041returnfalse;

7042 }

7043

7044// InsertedTruncs - Only insert one trunc in each block once.

7045DenseMap<BasicBlock *, Instruction *> InsertedTruncs;

7046

7047bool MadeChange =false;

7048for (Use &U : Src->uses()) {

7049Instruction *User = cast<Instruction>(U.getUser());

7050

7051// Figure out which BB this ext is used in.

7052BasicBlock *UserBB =User->getParent();

7053if (UserBB == DefBB)

7054continue;

7055

7056// Both src and def are live in this block. Rewrite the use.

7057Instruction *&InsertedTrunc = InsertedTruncs[UserBB];

7058

7059if (!InsertedTrunc) {

7060BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

7061assert(InsertPt != UserBB->end());

7062 InsertedTrunc =newTruncInst(I, Src->getType(),"");

7063 InsertedTrunc->insertBefore(*UserBB, InsertPt);

7064 InsertedInsts.insert(InsertedTrunc);

7065 }

7066

7067// Replace a use of the {s|z}ext source with a use of the result.

7068U = InsertedTrunc;

7069 ++NumExtUses;

7070 MadeChange =true;

7071 }

7072

7073return MadeChange;

7074}

7075

7076// Find loads whose uses only use some of the loaded value's bits. Add an "and"

7077// just after the load if the target can fold this into one extload instruction,

7078// with the hope of eliminating some of the other later "and" instructions using

7079// the loaded value. "and"s that are made trivially redundant by the insertion

7080// of the new "and" are removed by this function, while others (e.g. those whose

7081// path from the load goes through a phi) are left for isel to potentially

7082// remove.

7083//

7084// For example:

7085//

7086// b0:

7087// x = load i32

7088// ...

7089// b1:

7090// y = and x, 0xff

7091// z = use y

7092//

7093// becomes:

7094//

7095// b0:

7096// x = load i32

7097// x' = and x, 0xff

7098// ...

7099// b1:

7100// z = use x'

7101//

7102// whereas:

7103//

7104// b0:

7105// x1 = load i32

7106// ...

7107// b1:

7108// x2 = load i32

7109// ...

7110// b2:

7111// x = phi x1, x2

7112// y = and x, 0xff

7113//

7114// becomes (after a call to optimizeLoadExt for each load):

7115//

7116// b0:

7117// x1 = load i32

7118// x1' = and x1, 0xff

7119// ...

7120// b1:

7121// x2 = load i32

7122// x2' = and x2, 0xff

7123// ...

7124// b2:

7125// x = phi x1', x2'

7126// y = and x, 0xff

7127bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {

7128if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())

7129returnfalse;

7130

7131// Skip loads we've already transformed.

7132if (Load->hasOneUse() &&

7133 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))

7134returnfalse;

7135

7136// Look at all uses of Load, looking through phis, to determine how many bits

7137// of the loaded value are needed.

7138SmallVector<Instruction *, 8> WorkList;

7139SmallPtrSet<Instruction *, 16> Visited;

7140SmallVector<Instruction *, 8> AndsToMaybeRemove;

7141SmallVector<Instruction *, 8> DropFlags;

7142for (auto *U :Load->users())

7143 WorkList.push_back(cast<Instruction>(U));

7144

7145EVT LoadResultVT = TLI->getValueType(*DL,Load->getType());

7146unsignedBitWidth = LoadResultVT.getSizeInBits();

7147// If the BitWidth is 0, do not try to optimize the type

7148if (BitWidth == 0)

7149returnfalse;

7150

7151APInt DemandBits(BitWidth, 0);

7152APInt WidestAndBits(BitWidth, 0);

7153

7154while (!WorkList.empty()) {

7155Instruction *I = WorkList.pop_back_val();

7156

7157// Break use-def graph loops.

7158if (!Visited.insert(I).second)

7159continue;

7160

7161// For a PHI node, push all of its users.

7162if (auto *Phi = dyn_cast<PHINode>(I)) {

7163for (auto *U :Phi->users())

7164 WorkList.push_back(cast<Instruction>(U));

7165continue;

7166 }

7167

7168switch (I->getOpcode()) {

7169case Instruction::And: {

7170auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));

7171if (!AndC)

7172returnfalse;

7173APInt AndBits = AndC->getValue();

7174 DemandBits |= AndBits;

7175// Keep track of the widest and mask we see.

7176if (AndBits.ugt(WidestAndBits))

7177 WidestAndBits = AndBits;

7178if (AndBits == WidestAndBits &&I->getOperand(0) == Load)

7179 AndsToMaybeRemove.push_back(I);

7180break;

7181 }

7182

7183case Instruction::Shl: {

7184auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));

7185if (!ShlC)

7186returnfalse;

7187uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);

7188 DemandBits.setLowBits(BitWidth - ShiftAmt);

7189 DropFlags.push_back(I);

7190break;

7191 }

7192

7193case Instruction::Trunc: {

7194EVT TruncVT = TLI->getValueType(*DL,I->getType());

7195unsigned TruncBitWidth = TruncVT.getSizeInBits();

7196 DemandBits.setLowBits(TruncBitWidth);

7197 DropFlags.push_back(I);

7198break;

7199 }

7200

7201default:

7202returnfalse;

7203 }

7204 }

7205

7206uint32_t ActiveBits = DemandBits.getActiveBits();

7207// Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the

7208// target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,

7209// for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but

7210// (and (load x) 1) is not matched as a single instruction, rather as a LDR

7211// followed by an AND.

7212// TODO: Look into removing this restriction by fixing backends to either

7213// return false for isLoadExtLegal for i1 or have them select this pattern to

7214// a single instruction.

7215//

7216// Also avoid hoisting if we didn't see any ands with the exact DemandBits

7217// mask, since these are the only ands that will be removed by isel.

7218if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||

7219 WidestAndBits != DemandBits)

7220returnfalse;

7221

7222LLVMContext &Ctx =Load->getType()->getContext();

7223Type *TruncTy =Type::getIntNTy(Ctx, ActiveBits);

7224EVT TruncVT = TLI->getValueType(*DL, TruncTy);

7225

7226// Reject cases that won't be matched as extloads.

7227if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||

7228 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))

7229returnfalse;

7230

7231IRBuilder<> Builder(Load->getNextNonDebugInstruction());

7232auto *NewAnd = cast<Instruction>(

7233 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));

7234// Mark this instruction as "inserted by CGP", so that other

7235// optimizations don't touch it.

7236 InsertedInsts.insert(NewAnd);

7237

7238// Replace all uses of load with new and (except for the use of load in the

7239// new and itself).

7240replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);

7241 NewAnd->setOperand(0, Load);

7242

7243// Remove any and instructions that are now redundant.

7244for (auto *And : AndsToMaybeRemove)

7245// Check that the and mask is the same as the one we decided to put on the

7246// new and.

7247if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {

7248replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);

7249if (&*CurInstIterator ==And)

7250 CurInstIterator = std::next(And->getIterator());

7251And->eraseFromParent();

7252 ++NumAndUses;

7253 }

7254

7255// NSW flags may not longer hold.

7256for (auto *Inst : DropFlags)

7257 Inst->setHasNoSignedWrap(false);

7258

7259 ++NumAndsAdded;

7260returntrue;

7261}

7262

7263/// Check if V (an operand of a select instruction) is an expensive instruction

7264/// that is only used once.

7265staticboolsinkSelectOperand(constTargetTransformInfo *TTI,Value *V) {

7266auto *I = dyn_cast<Instruction>(V);

7267// If it's safe to speculatively execute, then it should not have side

7268// effects; therefore, it's safe to sink and possibly *not* execute.

7269returnI &&I->hasOneUse() &&isSafeToSpeculativelyExecute(I) &&

7270TTI->isExpensiveToSpeculativelyExecute(I);

7271}

7272

7273/// Returns true if a SelectInst should be turned into an explicit branch.

7274staticboolisFormingBranchFromSelectProfitable(constTargetTransformInfo *TTI,

7275constTargetLowering *TLI,

7276SelectInst *SI) {

7277// If even a predictable select is cheap, then a branch can't be cheaper.

7278if (!TLI->isPredictableSelectExpensive())

7279returnfalse;

7280

7281// FIXME: This should use the same heuristics as IfConversion to determine

7282// whether a select is better represented as a branch.

7283

7284// If metadata tells us that the select condition is obviously predictable,

7285// then we want to replace the select with a branch.

7286uint64_t TrueWeight, FalseWeight;

7287if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {

7288uint64_t Max = std::max(TrueWeight, FalseWeight);

7289uint64_t Sum = TrueWeight + FalseWeight;

7290if (Sum != 0) {

7291auto Probability =BranchProbability::getBranchProbability(Max, Sum);

7292if (Probability >TTI->getPredictableBranchThreshold())

7293returntrue;

7294 }

7295 }

7296

7297CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());

7298

7299// If a branch is predictable, an out-of-order CPU can avoid blocking on its

7300// comparison condition. If the compare has more than one use, there's

7301// probably another cmov or setcc around, so it's not worth emitting a branch.

7302if (!Cmp || !Cmp->hasOneUse())

7303returnfalse;

7304

7305// If either operand of the select is expensive and only needed on one side

7306// of the select, we should form a branch.

7307if (sinkSelectOperand(TTI, SI->getTrueValue()) ||

7308sinkSelectOperand(TTI, SI->getFalseValue()))

7309returntrue;

7310

7311returnfalse;

7312}

7313

7314/// If \p isTrue is true, return the true value of \p SI, otherwise return

7315/// false value of \p SI. If the true/false value of \p SI is defined by any

7316/// select instructions in \p Selects, look through the defining select

7317/// instruction until the true/false value is not defined in \p Selects.

7318staticValue *

7319getTrueOrFalseValue(SelectInst *SI,bool isTrue,

7320constSmallPtrSet<const Instruction *, 2> &Selects) {

7321Value *V =nullptr;

7322

7323for (SelectInst *DefSI = SI; DefSI !=nullptr && Selects.count(DefSI);

7324 DefSI = dyn_cast<SelectInst>(V)) {

7325assert(DefSI->getCondition() == SI->getCondition() &&

7326"The condition of DefSI does not match with SI");

7327 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());

7328 }

7329

7330assert(V &&"Failed to get select true/false value");

7331return V;

7332}

7333

7334bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {

7335assert(Shift->isShift() &&"Expected a shift");

7336

7337// If this is (1) a vector shift, (2) shifts by scalars are cheaper than

7338// general vector shifts, and (3) the shift amount is a select-of-splatted

7339// values, hoist the shifts before the select:

7340// shift Op0, (select Cond, TVal, FVal) -->

7341// select Cond, (shift Op0, TVal), (shift Op0, FVal)

7342//

7343// This is inverting a generic IR transform when we know that the cost of a

7344// general vector shift is more than the cost of 2 shift-by-scalars.

7345// We can't do this effectively in SDAG because we may not be able to

7346// determine if the select operands are splats from within a basic block.

7347Type *Ty = Shift->getType();

7348if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))

7349returnfalse;

7350Value *Cond, *TVal, *FVal;

7351if (!match(Shift->getOperand(1),

7352m_OneUse(m_Select(m_Value(Cond),m_Value(TVal),m_Value(FVal)))))

7353returnfalse;

7354if (!isSplatValue(TVal) || !isSplatValue(FVal))

7355returnfalse;

7356

7357IRBuilder<> Builder(Shift);

7358BinaryOperator::BinaryOps Opcode = Shift->getOpcode();

7359Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);

7360Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);

7361Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);

7362replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);

7363 Shift->eraseFromParent();

7364returntrue;

7365}

7366

7367bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {

7368Intrinsic::ID Opcode = Fsh->getIntrinsicID();

7369assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&

7370"Expected a funnel shift");

7371

7372// If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper

7373// than general vector shifts, and (3) the shift amount is select-of-splatted

7374// values, hoist the funnel shifts before the select:

7375// fsh Op0, Op1, (select Cond, TVal, FVal) -->

7376// select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)

7377//

7378// This is inverting a generic IR transform when we know that the cost of a

7379// general vector shift is more than the cost of 2 shift-by-scalars.

7380// We can't do this effectively in SDAG because we may not be able to

7381// determine if the select operands are splats from within a basic block.

7382Type *Ty = Fsh->getType();

7383if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))

7384returnfalse;

7385Value *Cond, *TVal, *FVal;

7386if (!match(Fsh->getOperand(2),

7387m_OneUse(m_Select(m_Value(Cond),m_Value(TVal),m_Value(FVal)))))

7388returnfalse;

7389if (!isSplatValue(TVal) || !isSplatValue(FVal))

7390returnfalse;

7391

7392IRBuilder<> Builder(Fsh);

7393Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);

7394Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X,Y, TVal});

7395Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X,Y, FVal});

7396Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);

7397replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);

7398 Fsh->eraseFromParent();

7399returntrue;

7400}

7401

7402/// If we have a SelectInst that will likely profit from branch prediction,

7403/// turn it into a branch.

7404bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {

7405if (DisableSelectToBranch)

7406returnfalse;

7407

7408// If the SelectOptimize pass is enabled, selects have already been optimized.

7409if (!getCGPassBuilderOption().DisableSelectOptimize)

7410returnfalse;

7411

7412// Find all consecutive select instructions that share the same condition.

7413SmallVector<SelectInst *, 2> ASI;

7414 ASI.push_back(SI);

7415for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);

7416 It !=SI->getParent()->end(); ++It) {

7417SelectInst *I = dyn_cast<SelectInst>(&*It);

7418if (I &&SI->getCondition() ==I->getCondition()) {

7419 ASI.push_back(I);

7420 }else {

7421break;

7422 }

7423 }

7424

7425SelectInst *LastSI = ASI.back();

7426// Increment the current iterator to skip all the rest of select instructions

7427// because they will be either "not lowered" or "all lowered" to branch.

7428 CurInstIterator = std::next(LastSI->getIterator());

7429// Examine debug-info attached to the consecutive select instructions. They

7430// won't be individually optimised by optimizeInst, so we need to perform

7431// DbgVariableRecord maintenence here instead.

7432for (SelectInst *SI :ArrayRef(ASI).drop_front())

7433 fixupDbgVariableRecordsOnInst(*SI);

7434

7435bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);

7436

7437// Can we convert the 'select' to CF ?

7438if (VectorCond ||SI->getMetadata(LLVMContext::MD_unpredictable))

7439returnfalse;

7440

7441TargetLowering::SelectSupportKind SelectKind;

7442if (SI->getType()->isVectorTy())

7443 SelectKind = TargetLowering::ScalarCondVectorVal;

7444else

7445 SelectKind = TargetLowering::ScalarValSelect;

7446

7447if (TLI->isSelectSupported(SelectKind) &&

7448 (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) ||

7449llvm::shouldOptimizeForSize(SI->getParent(), PSI,BFI.get())))

7450returnfalse;

7451

7452// The DominatorTree needs to be rebuilt by any consumers after this

7453// transformation. We simply reset here rather than setting the ModifiedDT

7454// flag to avoid restarting the function walk in runOnFunction for each

7455// select optimized.

7456 DT.reset();

7457

7458// Transform a sequence like this:

7459// start:

7460// %cmp = cmp uge i32 %a, %b

7461// %sel = select i1 %cmp, i32 %c, i32 %d

7462//

7463// Into:

7464// start:

7465// %cmp = cmp uge i32 %a, %b

7466// %cmp.frozen = freeze %cmp

7467// br i1 %cmp.frozen, label %select.true, label %select.false

7468// select.true:

7469// br label %select.end

7470// select.false:

7471// br label %select.end

7472// select.end:

7473// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]

7474//

7475// %cmp should be frozen, otherwise it may introduce undefined behavior.

7476// In addition, we may sink instructions that produce %c or %d from

7477// the entry block into the destination(s) of the new branch.

7478// If the true or false blocks do not contain a sunken instruction, that

7479// block and its branch may be optimized away. In that case, one side of the

7480// first branch will point directly to select.end, and the corresponding PHI

7481// predecessor block will be the start block.

7482

7483// Collect values that go on the true side and the values that go on the false

7484// side.

7485SmallVector<Instruction *> TrueInstrs, FalseInstrs;

7486for (SelectInst *SI : ASI) {

7487if (Value *V =SI->getTrueValue();sinkSelectOperand(TTI, V))

7488 TrueInstrs.push_back(cast<Instruction>(V));

7489if (Value *V =SI->getFalseValue();sinkSelectOperand(TTI, V))

7490 FalseInstrs.push_back(cast<Instruction>(V));

7491 }

7492

7493// Split the select block, according to how many (if any) values go on each

7494// side.

7495BasicBlock *StartBlock =SI->getParent();

7496BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));

7497// We should split before any debug-info.

7498 SplitPt.setHeadBit(true);

7499

7500IRBuilder<>IB(SI);

7501auto *CondFr =IB.CreateFreeze(SI->getCondition(),SI->getName() +".frozen");

7502

7503BasicBlock *TrueBlock =nullptr;

7504BasicBlock *FalseBlock =nullptr;

7505BasicBlock *EndBlock =nullptr;

7506BranchInst *TrueBranch =nullptr;

7507BranchInst *FalseBranch =nullptr;

7508if (TrueInstrs.size() == 0) {

7509 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(

7510 CondFr, SplitPt,false,nullptr,nullptr, LI));

7511 FalseBlock = FalseBranch->getParent();

7512 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));

7513 }elseif (FalseInstrs.size() == 0) {

7514 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(

7515 CondFr, SplitPt,false,nullptr,nullptr, LI));

7516 TrueBlock = TrueBranch->getParent();

7517 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));

7518 }else {

7519Instruction *ThenTerm =nullptr;

7520Instruction *ElseTerm =nullptr;

7521SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,

7522nullptr,nullptr, LI);

7523 TrueBranch = cast<BranchInst>(ThenTerm);

7524 FalseBranch = cast<BranchInst>(ElseTerm);

7525 TrueBlock = TrueBranch->getParent();

7526 FalseBlock = FalseBranch->getParent();

7527 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));

7528 }

7529

7530 EndBlock->setName("select.end");

7531if (TrueBlock)

7532 TrueBlock->setName("select.true.sink");

7533if (FalseBlock)

7534 FalseBlock->setName(FalseInstrs.size() == 0 ?"select.false"

7535 :"select.false.sink");

7536

7537if (IsHugeFunc) {

7538if (TrueBlock)

7539 FreshBBs.insert(TrueBlock);

7540if (FalseBlock)

7541 FreshBBs.insert(FalseBlock);

7542 FreshBBs.insert(EndBlock);

7543 }

7544

7545BFI->setBlockFreq(EndBlock,BFI->getBlockFreq(StartBlock));

7546

7547staticconstunsigned MD[] = {

7548 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,

7549 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};

7550 StartBlock->getTerminator()->copyMetadata(*SI, MD);

7551

7552// Sink expensive instructions into the conditional blocks to avoid executing

7553// them speculatively.

7554for (Instruction *I : TrueInstrs)

7555I->moveBefore(TrueBranch->getIterator());

7556for (Instruction *I : FalseInstrs)

7557I->moveBefore(FalseBranch->getIterator());

7558

7559// If we did not create a new block for one of the 'true' or 'false' paths

7560// of the condition, it means that side of the branch goes to the end block

7561// directly and the path originates from the start block from the point of

7562// view of the new PHI.

7563if (TrueBlock ==nullptr)

7564 TrueBlock = StartBlock;

7565elseif (FalseBlock ==nullptr)

7566 FalseBlock = StartBlock;

7567

7568SmallPtrSet<const Instruction *, 2> INS;

7569 INS.insert(ASI.begin(), ASI.end());

7570// Use reverse iterator because later select may use the value of the

7571// earlier select, and we need to propagate value through earlier select

7572// to get the PHI operand.

7573for (SelectInst *SI :llvm::reverse(ASI)) {

7574// The select itself is replaced with a PHI Node.

7575PHINode *PN =PHINode::Create(SI->getType(), 2,"");

7576 PN->insertBefore(EndBlock->begin());

7577 PN->takeName(SI);

7578 PN->addIncoming(getTrueOrFalseValue(SI,true, INS), TrueBlock);

7579 PN->addIncoming(getTrueOrFalseValue(SI,false, INS), FalseBlock);

7580 PN->setDebugLoc(SI->getDebugLoc());

7581

7582replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);

7583SI->eraseFromParent();

7584 INS.erase(SI);

7585 ++NumSelectsExpanded;

7586 }

7587

7588// Instruct OptimizeBlock to skip to the next block.

7589 CurInstIterator = StartBlock->end();

7590returntrue;

7591}

7592

7593/// Some targets only accept certain types for splat inputs. For example a VDUP

7594/// in MVE takes a GPR (integer) register, and the instruction that incorporate

7595/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.

7596bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {

7597// Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only

7598if (!match(SVI,m_Shuffle(m_InsertElt(m_Undef(),m_Value(),m_ZeroInt()),

7599m_Undef(),m_ZeroMask())))

7600returnfalse;

7601Type *NewType = TLI->shouldConvertSplatType(SVI);

7602if (!NewType)

7603returnfalse;

7604

7605auto *SVIVecType = cast<FixedVectorType>(SVI->getType());

7606assert(!NewType->isVectorTy() &&"Expected a scalar type!");

7607assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&

7608"Expected a type of the same size!");

7609auto *NewVecType =

7610FixedVectorType::get(NewType, SVIVecType->getNumElements());

7611

7612// Create a bitcast (shuffle (insert (bitcast(..))))

7613IRBuilder<> Builder(SVI->getContext());

7614 Builder.SetInsertPoint(SVI);

7615Value *BC1 = Builder.CreateBitCast(

7616 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);

7617Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);

7618Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);

7619

7620replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);

7621RecursivelyDeleteTriviallyDeadInstructions(

7622 SVI, TLInfo,nullptr,

7623 [&](Value *V) { removeAllAssertingVHReferences(V); });

7624

7625// Also hoist the bitcast up to its operand if it they are not in the same

7626// block.

7627if (auto *BCI = dyn_cast<Instruction>(BC1))

7628if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))

7629if (BCI->getParent() !=Op->getParent() && !isa<PHINode>(Op) &&

7630 !Op->isTerminator() && !Op->isEHPad())

7631 BCI->moveAfter(Op);

7632

7633returntrue;

7634}

7635

7636bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {

7637// If the operands of I can be folded into a target instruction together with

7638// I, duplicate and sink them.

7639SmallVector<Use *, 4> OpsToSink;

7640if (!TTI->isProfitableToSinkOperands(I, OpsToSink))

7641returnfalse;

7642

7643// OpsToSink can contain multiple uses in a use chain (e.g.

7644// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating

7645// uses must come first, so we process the ops in reverse order so as to not

7646// create invalid IR.

7647BasicBlock *TargetBB =I->getParent();

7648bool Changed =false;

7649SmallVector<Use *, 4> ToReplace;

7650Instruction *InsertPoint =I;

7651DenseMap<const Instruction *, unsigned long> InstOrdering;

7652unsignedlong InstNumber = 0;

7653for (constauto &I : *TargetBB)

7654 InstOrdering[&I] = InstNumber++;

7655

7656for (Use *U :reverse(OpsToSink)) {

7657auto *UI = cast<Instruction>(U->get());

7658if (isa<PHINode>(UI))

7659continue;

7660if (UI->getParent() == TargetBB) {

7661if (InstOrdering[UI] < InstOrdering[InsertPoint])

7662 InsertPoint = UI;

7663continue;

7664 }

7665 ToReplace.push_back(U);

7666 }

7667

7668SetVector<Instruction *> MaybeDead;

7669DenseMap<Instruction *, Instruction *> NewInstructions;

7670for (Use *U : ToReplace) {

7671auto *UI = cast<Instruction>(U->get());

7672Instruction *NI = UI->clone();

7673

7674if (IsHugeFunc) {

7675// Now we clone an instruction, its operands' defs may sink to this BB

7676// now. So we put the operands defs' BBs into FreshBBs to do optimization.

7677for (Value *Op : NI->operands())

7678if (auto *OpDef = dyn_cast<Instruction>(Op))

7679 FreshBBs.insert(OpDef->getParent());

7680 }

7681

7682 NewInstructions[UI] = NI;

7683 MaybeDead.insert(UI);

7684LLVM_DEBUG(dbgs() <<"Sinking " << *UI <<" to user " << *I <<"\n");

7685 NI->insertBefore(InsertPoint->getIterator());

7686 InsertPoint = NI;

7687 InsertedInsts.insert(NI);

7688

7689// Update the use for the new instruction, making sure that we update the

7690// sunk instruction uses, if it is part of a chain that has already been

7691// sunk.

7692Instruction *OldI = cast<Instruction>(U->getUser());

7693if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())

7694 It->second->setOperand(U->getOperandNo(), NI);

7695else

7696U->set(NI);

7697 Changed =true;

7698 }

7699

7700// Remove instructions that are dead after sinking.

7701for (auto *I : MaybeDead) {

7702if (!I->hasNUsesOrMore(1)) {

7703LLVM_DEBUG(dbgs() <<"Removing dead instruction: " << *I <<"\n");

7704I->eraseFromParent();

7705 }

7706 }

7707

7708return Changed;

7709}

7710

7711bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {

7712Value *Cond =SI->getCondition();

7713Type *OldType =Cond->getType();

7714LLVMContext &Context =Cond->getContext();

7715EVT OldVT = TLI->getValueType(*DL, OldType);

7716MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);

7717unsigned RegWidth = RegType.getSizeInBits();

7718

7719if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())

7720returnfalse;

7721

7722// If the register width is greater than the type width, expand the condition

7723// of the switch instruction and each case constant to the width of the

7724// register. By widening the type of the switch condition, subsequent

7725// comparisons (for case comparisons) will not need to be extended to the

7726// preferred register width, so we will potentially eliminate N-1 extends,

7727// where N is the number of cases in the switch.

7728auto *NewType =Type::getIntNTy(Context, RegWidth);

7729

7730// Extend the switch condition and case constants using the target preferred

7731// extend unless the switch condition is a function argument with an extend

7732// attribute. In that case, we can avoid an unnecessary mask/extension by

7733// matching the argument extension instead.

7734Instruction::CastOps ExtType = Instruction::ZExt;

7735// Some targets prefer SExt over ZExt.

7736if (TLI->isSExtCheaperThanZExt(OldVT, RegType))

7737 ExtType = Instruction::SExt;

7738

7739if (auto *Arg = dyn_cast<Argument>(Cond)) {

7740if (Arg->hasSExtAttr())

7741 ExtType = Instruction::SExt;

7742if (Arg->hasZExtAttr())

7743 ExtType = Instruction::ZExt;

7744 }

7745

7746auto *ExtInst =CastInst::Create(ExtType,Cond, NewType);

7747 ExtInst->insertBefore(SI->getIterator());

7748 ExtInst->setDebugLoc(SI->getDebugLoc());

7749SI->setCondition(ExtInst);

7750for (auto Case :SI->cases()) {

7751constAPInt &NarrowConst = Case.getCaseValue()->getValue();

7752APInt WideConst = (ExtType == Instruction::ZExt)

7753 ? NarrowConst.zext(RegWidth)

7754 : NarrowConst.sext(RegWidth);

7755 Case.setValue(ConstantInt::get(Context, WideConst));

7756 }

7757

7758returntrue;

7759}

7760

7761bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {

7762// The SCCP optimization tends to produce code like this:

7763// switch(x) { case 42: phi(42, ...) }

7764// Materializing the constant for the phi-argument needs instructions; So we

7765// change the code to:

7766// switch(x) { case 42: phi(x, ...) }

7767

7768Value *Condition =SI->getCondition();

7769// Avoid endless loop in degenerate case.

7770if (isa<ConstantInt>(*Condition))

7771returnfalse;

7772

7773bool Changed =false;

7774BasicBlock *SwitchBB =SI->getParent();

7775Type *ConditionType = Condition->getType();

7776

7777for (constSwitchInst::CaseHandle &Case :SI->cases()) {

7778ConstantInt *CaseValue = Case.getCaseValue();

7779BasicBlock *CaseBB = Case.getCaseSuccessor();

7780// Set to true if we previously checked that `CaseBB` is only reached by

7781// a single case from this switch.

7782bool CheckedForSinglePred =false;

7783for (PHINode &PHI : CaseBB->phis()) {

7784Type *PHIType =PHI.getType();

7785// If ZExt is free then we can also catch patterns like this:

7786// switch((i32)x) { case 42: phi((i64)42, ...); }

7787// and replace `(i64)42` with `zext i32 %x to i64`.

7788bool TryZExt =

7789 PHIType->isIntegerTy() &&

7790 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&

7791 TLI->isZExtFree(ConditionType, PHIType);

7792if (PHIType == ConditionType || TryZExt) {

7793// Set to true to skip this case because of multiple preds.

7794bool SkipCase =false;

7795Value *Replacement =nullptr;

7796for (unsignedI = 0, E =PHI.getNumIncomingValues();I != E;I++) {

7797Value *PHIValue =PHI.getIncomingValue(I);

7798if (PHIValue != CaseValue) {

7799if (!TryZExt)

7800continue;

7801ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);

7802if (!PHIValueInt ||

7803 PHIValueInt->getValue() !=

7804 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))

7805continue;

7806 }

7807if (PHI.getIncomingBlock(I) != SwitchBB)

7808continue;

7809// We cannot optimize if there are multiple case labels jumping to

7810// this block. This check may get expensive when there are many

7811// case labels so we test for it last.

7812if (!CheckedForSinglePred) {

7813 CheckedForSinglePred =true;

7814if (SI->findCaseDest(CaseBB) ==nullptr) {

7815 SkipCase =true;

7816break;

7817 }

7818 }

7819

7820if (Replacement ==nullptr) {

7821if (PHIValue == CaseValue) {

7822 Replacement = Condition;

7823 }else {

7824IRBuilder<> Builder(SI);

7825 Replacement = Builder.CreateZExt(Condition, PHIType);

7826 }

7827 }

7828PHI.setIncomingValue(I, Replacement);

7829 Changed =true;

7830 }

7831if (SkipCase)

7832break;

7833 }

7834 }

7835 }

7836return Changed;

7837}

7838

7839bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {

7840bool Changed = optimizeSwitchType(SI);

7841 Changed |= optimizeSwitchPhiConstants(SI);

7842return Changed;

7843}

7844

7845namespace{

7846

7847/// Helper class to promote a scalar operation to a vector one.

7848/// This class is used to move downward extractelement transition.

7849/// E.g.,

7850/// a = vector_op <2 x i32>

7851/// b = extractelement <2 x i32> a, i32 0

7852/// c = scalar_op b

7853/// store c

7854///

7855/// =>

7856/// a = vector_op <2 x i32>

7857/// c = vector_op a (equivalent to scalar_op on the related lane)

7858/// * d = extractelement <2 x i32> c, i32 0

7859/// * store d

7860/// Assuming both extractelement and store can be combine, we get rid of the

7861/// transition.

7862classVectorPromoteHelper {

7863 /// DataLayout associated with the current module.

7864constDataLayout &DL;

7865

7866 /// Used to perform some checks on the legality of vector operations.

7867constTargetLowering &TLI;

7868

7869 /// Used to estimated the cost of the promoted chain.

7870constTargetTransformInfo &TTI;

7871

7872 /// The transition being moved downwards.

7873Instruction *Transition;

7874

7875 /// The sequence of instructions to be promoted.

7876SmallVector<Instruction *, 4> InstsToBePromoted;

7877

7878 /// Cost of combining a store and an extract.

7879unsigned StoreExtractCombineCost;

7880

7881 /// Instruction that will be combined with the transition.

7882Instruction *CombineInst =nullptr;

7883

7884 /// The instruction that represents the current end of the transition.

7885 /// Since we are faking the promotion until we reach the end of the chain

7886 /// of computation, we need a way to get the current end of the transition.

7887Instruction *getEndOfTransition() const{

7888if (InstsToBePromoted.empty())

7889return Transition;

7890return InstsToBePromoted.back();

7891 }

7892

7893 /// Return the index of the original value in the transition.

7894 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,

7895 /// c, is at index 0.

7896unsigned getTransitionOriginalValueIdx() const{

7897assert(isa<ExtractElementInst>(Transition) &&

7898"Other kind of transitions are not supported yet");

7899return 0;

7900 }

7901

7902 /// Return the index of the index in the transition.

7903 /// E.g., for "extractelement <2 x i32> c, i32 0" the index

7904 /// is at index 1.

7905unsigned getTransitionIdx() const{

7906assert(isa<ExtractElementInst>(Transition) &&

7907"Other kind of transitions are not supported yet");

7908return 1;

7909 }

7910

7911 /// Get the type of the transition.

7912 /// This is the type of the original value.

7913 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the

7914 /// transition is <2 x i32>.

7915Type *getTransitionType() const{

7916return Transition->getOperand(getTransitionOriginalValueIdx())->getType();

7917 }

7918

7919 /// Promote \p ToBePromoted by moving \p Def downward through.

7920 /// I.e., we have the following sequence:

7921 /// Def = Transition <ty1> a to <ty2>

7922 /// b = ToBePromoted <ty2> Def, ...

7923 /// =>

7924 /// b = ToBePromoted <ty1> a, ...

7925 /// Def = Transition <ty1> ToBePromoted to <ty2>

7926void promoteImpl(Instruction *ToBePromoted);

7927

7928 /// Check whether or not it is profitable to promote all the

7929 /// instructions enqueued to be promoted.

7930bool isProfitableToPromote() {

7931Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());

7932unsignedIndex = isa<ConstantInt>(ValIdx)

7933 ? cast<ConstantInt>(ValIdx)->getZExtValue()

7934 : -1;

7935Type *PromotedType = getTransitionType();

7936

7937StoreInst *ST = cast<StoreInst>(CombineInst);

7938unsigned AS =ST->getPointerAddressSpace();

7939// Check if this store is supported.

7940if (!TLI.allowsMisalignedMemoryAccesses(

7941 TLI.getValueType(DL,ST->getValueOperand()->getType()), AS,

7942ST->getAlign())) {

7943// If this is not supported, there is no way we can combine

7944// the extract with the store.

7945returnfalse;

7946 }

7947

7948// The scalar chain of computation has to pay for the transition

7949// scalar to vector.

7950// The vector chain has to account for the combining cost.

7951enumTargetTransformInfo::TargetCostKind CostKind =

7952TargetTransformInfo::TCK_RecipThroughput;

7953InstructionCost ScalarCost =

7954TTI.getVectorInstrCost(*Transition, PromotedType,CostKind, Index);

7955InstructionCost VectorCost = StoreExtractCombineCost;

7956for (constauto &Inst : InstsToBePromoted) {

7957// Compute the cost.

7958// By construction, all instructions being promoted are arithmetic ones.

7959// Moreover, one argument is a constant that can be viewed as a splat

7960// constant.

7961Value *Arg0 = Inst->getOperand(0);

7962bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||

7963 isa<ConstantFP>(Arg0);

7964TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;

7965if (IsArg0Constant)

7966 Arg0Info.Kind =TargetTransformInfo::OK_UniformConstantValue;

7967else

7968 Arg1Info.Kind =TargetTransformInfo::OK_UniformConstantValue;

7969

7970 ScalarCost +=TTI.getArithmeticInstrCost(

7971 Inst->getOpcode(), Inst->getType(),CostKind, Arg0Info, Arg1Info);

7972 VectorCost +=TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,

7973CostKind, Arg0Info, Arg1Info);

7974 }

7975LLVM_DEBUG(

7976dbgs() <<"Estimated cost of computation to be promoted:\nScalar: "

7977 << ScalarCost <<"\nVector: " << VectorCost <<'\n');

7978return ScalarCost > VectorCost;

7979 }

7980

7981 /// Generate a constant vector with \p Val with the same

7982 /// number of elements as the transition.

7983 /// \p UseSplat defines whether or not \p Val should be replicated

7984 /// across the whole vector.

7985 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,

7986 /// otherwise we generate a vector with as many poison as possible:

7987 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only

7988 /// used at the index of the extract.

7989Value *getConstantVector(Constant *Val,bool UseSplat) const{

7990unsigned ExtractIdx = std::numeric_limits<unsigned>::max();

7991if (!UseSplat) {

7992// If we cannot determine where the constant must be, we have to

7993// use a splat constant.

7994Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());

7995if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))

7996 ExtractIdx = CstVal->getSExtValue();

7997else

7998 UseSplat =true;

7999 }

8000

8001ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();

8002if (UseSplat)

8003returnConstantVector::getSplat(EC, Val);

8004

8005if (!EC.isScalable()) {

8006SmallVector<Constant *, 4> ConstVec;

8007PoisonValue *PoisonVal =PoisonValue::get(Val->getType());

8008for (unsignedIdx = 0;Idx !=EC.getKnownMinValue(); ++Idx) {

8009if (Idx == ExtractIdx)

8010 ConstVec.push_back(Val);

8011else

8012 ConstVec.push_back(PoisonVal);

8013 }

8014returnConstantVector::get(ConstVec);

8015 }else

8016llvm_unreachable(

8017"Generate scalable vector for non-splat is unimplemented");

8018 }

8019

8020 /// Check if promoting to a vector type an operand at \p OperandIdx

8021 /// in \p Use can trigger undefined behavior.

8022staticbool canCauseUndefinedBehavior(constInstruction *Use,

8023unsigned OperandIdx) {

8024// This is not safe to introduce undef when the operand is on

8025// the right hand side of a division-like instruction.

8026if (OperandIdx != 1)

8027returnfalse;

8028switch (Use->getOpcode()) {

8029default:

8030returnfalse;

8031case Instruction::SDiv:

8032case Instruction::UDiv:

8033case Instruction::SRem:

8034case Instruction::URem:

8035returntrue;

8036case Instruction::FDiv:

8037case Instruction::FRem:

8038return !Use->hasNoNaNs();

8039 }

8040llvm_unreachable(nullptr);

8041 }

8042

8043public:

8044 VectorPromoteHelper(constDataLayout &DL,constTargetLowering &TLI,

8045constTargetTransformInfo &TTI,Instruction *Transition,

8046unsigned CombineCost)

8047 :DL(DL), TLI(TLI),TTI(TTI), Transition(Transition),

8048 StoreExtractCombineCost(CombineCost) {

8049assert(Transition &&"Do not know how to promote null");

8050 }

8051

8052 /// Check if we can promote \p ToBePromoted to \p Type.

8053bool canPromote(constInstruction *ToBePromoted) const{

8054// We could support CastInst too.

8055return isa<BinaryOperator>(ToBePromoted);

8056 }

8057

8058 /// Check if it is profitable to promote \p ToBePromoted

8059 /// by moving downward the transition through.

8060bool shouldPromote(constInstruction *ToBePromoted) const{

8061// Promote only if all the operands can be statically expanded.

8062// Indeed, we do not want to introduce any new kind of transitions.

8063for (constUse &U : ToBePromoted->operands()) {

8064constValue *Val =U.get();

8065if (Val == getEndOfTransition()) {

8066// If the use is a division and the transition is on the rhs,

8067// we cannot promote the operation, otherwise we may create a

8068// division by zero.

8069if (canCauseUndefinedBehavior(ToBePromoted,U.getOperandNo()))

8070returnfalse;

8071continue;

8072 }

8073if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&

8074 !isa<ConstantFP>(Val))

8075returnfalse;

8076 }

8077// Check that the resulting operation is legal.

8078int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());

8079if (!ISDOpcode)

8080returnfalse;

8081returnStressStoreExtract ||

8082 TLI.isOperationLegalOrCustom(

8083 ISDOpcode, TLI.getValueType(DL, getTransitionType(),true));

8084 }

8085

8086 /// Check whether or not \p Use can be combined

8087 /// with the transition.

8088 /// I.e., is it possible to do Use(Transition) => AnotherUse?

8089boolcanCombine(constInstruction *Use) {return isa<StoreInst>(Use); }

8090

8091 /// Record \p ToBePromoted as part of the chain to be promoted.

8092void enqueueForPromotion(Instruction *ToBePromoted) {

8093 InstsToBePromoted.push_back(ToBePromoted);

8094 }

8095

8096 /// Set the instruction that will be combined with the transition.

8097void recordCombineInstruction(Instruction *ToBeCombined) {

8098assert(canCombine(ToBeCombined) &&"Unsupported instruction to combine");

8099 CombineInst = ToBeCombined;

8100 }

8101

8102 /// Promote all the instructions enqueued for promotion if it is

8103 /// is profitable.

8104 /// \return True if the promotion happened, false otherwise.

8105bool promote() {

8106// Check if there is something to promote.

8107// Right now, if we do not have anything to combine with,

8108// we assume the promotion is not profitable.

8109if (InstsToBePromoted.empty() || !CombineInst)

8110returnfalse;

8111

8112// Check cost.

8113if (!StressStoreExtract && !isProfitableToPromote())

8114returnfalse;

8115

8116// Promote.

8117for (auto &ToBePromoted : InstsToBePromoted)

8118 promoteImpl(ToBePromoted);

8119 InstsToBePromoted.clear();

8120returntrue;

8121 }

8122};

8123

8124}// end anonymous namespace

8125

8126void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {

8127// At this point, we know that all the operands of ToBePromoted but Def

8128// can be statically promoted.

8129// For Def, we need to use its parameter in ToBePromoted:

8130// b = ToBePromoted ty1 a

8131// Def = Transition ty1 b to ty2

8132// Move the transition down.

8133// 1. Replace all uses of the promoted operation by the transition.

8134// = ... b => = ... Def.

8135assert(ToBePromoted->getType() == Transition->getType() &&

8136"The type of the result of the transition does not match "

8137"the final type");

8138 ToBePromoted->replaceAllUsesWith(Transition);

8139// 2. Update the type of the uses.

8140// b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.

8141Type *TransitionTy = getTransitionType();

8142 ToBePromoted->mutateType(TransitionTy);

8143// 3. Update all the operands of the promoted operation with promoted

8144// operands.

8145// b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.

8146for (Use &U : ToBePromoted->operands()) {

8147Value *Val =U.get();

8148Value *NewVal =nullptr;

8149if (Val == Transition)

8150 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());

8151elseif (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||

8152 isa<ConstantFP>(Val)) {

8153// Use a splat constant if it is not safe to use undef.

8154 NewVal =getConstantVector(

8155 cast<Constant>(Val),

8156 isa<UndefValue>(Val) ||

8157 canCauseUndefinedBehavior(ToBePromoted,U.getOperandNo()));

8158 }else

8159llvm_unreachable("Did you modified shouldPromote and forgot to update "

8160"this?");

8161 ToBePromoted->setOperand(U.getOperandNo(), NewVal);

8162 }

8163 Transition->moveAfter(ToBePromoted);

8164 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);

8165}

8166

8167/// Some targets can do store(extractelement) with one instruction.

8168/// Try to push the extractelement towards the stores when the target

8169/// has this feature and this is profitable.

8170bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {

8171unsigned CombineCost = std::numeric_limits<unsigned>::max();

8172if (DisableStoreExtract ||

8173 (!StressStoreExtract &&

8174 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),

8175 Inst->getOperand(1), CombineCost)))

8176returnfalse;

8177

8178// At this point we know that Inst is a vector to scalar transition.

8179// Try to move it down the def-use chain, until:

8180// - We can combine the transition with its single use

8181// => we got rid of the transition.

8182// - We escape the current basic block

8183// => we would need to check that we are moving it at a cheaper place and

8184// we do not do that for now.

8185BasicBlock *Parent = Inst->getParent();

8186LLVM_DEBUG(dbgs() <<"Found an interesting transition: " << *Inst <<'\n');

8187 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);

8188// If the transition has more than one use, assume this is not going to be

8189// beneficial.

8190while (Inst->hasOneUse()) {

8191Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());

8192LLVM_DEBUG(dbgs() <<"Use: " << *ToBePromoted <<'\n');

8193

8194if (ToBePromoted->getParent() != Parent) {

8195LLVM_DEBUG(dbgs() <<"Instruction to promote is in a different block ("

8196 << ToBePromoted->getParent()->getName()

8197 <<") than the transition (" << Parent->getName()

8198 <<").\n");

8199returnfalse;

8200 }

8201

8202if (VPH.canCombine(ToBePromoted)) {

8203LLVM_DEBUG(dbgs() <<"Assume " << *Inst <<'\n'

8204 <<"will be combined with: " << *ToBePromoted <<'\n');

8205 VPH.recordCombineInstruction(ToBePromoted);

8206bool Changed = VPH.promote();

8207 NumStoreExtractExposed += Changed;

8208return Changed;

8209 }

8210

8211LLVM_DEBUG(dbgs() <<"Try promoting.\n");

8212if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))

8213returnfalse;

8214

8215LLVM_DEBUG(dbgs() <<"Promoting is possible... Enqueue for promotion!\n");

8216

8217 VPH.enqueueForPromotion(ToBePromoted);

8218 Inst = ToBePromoted;

8219 }

8220returnfalse;

8221}

8222

8223/// For the instruction sequence of store below, F and I values

8224/// are bundled together as an i64 value before being stored into memory.

8225/// Sometimes it is more efficient to generate separate stores for F and I,

8226/// which can remove the bitwise instructions or sink them to colder places.

8227///

8228/// (store (or (zext (bitcast F to i32) to i64),

8229/// (shl (zext I to i64), 32)), addr) -->

8230/// (store F, addr) and (store I, addr+4)

8231///

8232/// Similarly, splitting for other merged store can also be beneficial, like:

8233/// For pair of {i32, i32}, i64 store --> two i32 stores.

8234/// For pair of {i32, i16}, i64 store --> two i32 stores.

8235/// For pair of {i16, i16}, i32 store --> two i16 stores.

8236/// For pair of {i16, i8}, i32 store --> two i16 stores.

8237/// For pair of {i8, i8}, i16 store --> two i8 stores.

8238///

8239/// We allow each target to determine specifically which kind of splitting is

8240/// supported.

8241///

8242/// The store patterns are commonly seen from the simple code snippet below

8243/// if only std::make_pair(...) is sroa transformed before inlined into hoo.

8244/// void goo(const std::pair<int, float> &);

8245/// hoo() {

8246/// ...

8247/// goo(std::make_pair(tmp, ftmp));

8248/// ...

8249/// }

8250///

8251/// Although we already have similar splitting in DAG Combine, we duplicate

8252/// it in CodeGenPrepare to catch the case in which pattern is across

8253/// multiple BBs. The logic in DAG Combine is kept to catch case generated

8254/// during code expansion.

8255staticboolsplitMergedValStore(StoreInst &SI,constDataLayout &DL,

8256constTargetLowering &TLI) {

8257// Handle simple but common cases only.

8258Type *StoreType = SI.getValueOperand()->getType();

8259

8260// The code below assumes shifting a value by <number of bits>,

8261// whereas scalable vectors would have to be shifted by

8262// <2log(vscale) + number of bits> in order to store the

8263// low/high parts. Bailing out for now.

8264if (StoreType->isScalableTy())

8265returnfalse;

8266

8267if (!DL.typeSizeEqualsStoreSize(StoreType) ||

8268DL.getTypeSizeInBits(StoreType) == 0)

8269returnfalse;

8270

8271unsigned HalfValBitSize =DL.getTypeSizeInBits(StoreType) / 2;

8272Type *SplitStoreType =Type::getIntNTy(SI.getContext(), HalfValBitSize);

8273if (!DL.typeSizeEqualsStoreSize(SplitStoreType))

8274returnfalse;

8275

8276// Don't split the store if it is volatile.

8277if (SI.isVolatile())

8278returnfalse;

8279

8280// Match the following patterns:

8281// (store (or (zext LValue to i64),

8282// (shl (zext HValue to i64), 32)), HalfValBitSize)

8283// or

8284// (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)

8285// (zext LValue to i64),

8286// Expect both operands of OR and the first operand of SHL have only

8287// one use.

8288Value *LValue, *HValue;

8289if (!match(SI.getValueOperand(),

8290m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),

8291m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),

8292m_SpecificInt(HalfValBitSize))))))

8293returnfalse;

8294

8295// Check LValue and HValue are int with size less or equal than 32.

8296if (!LValue->getType()->isIntegerTy() ||

8297DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||

8298 !HValue->getType()->isIntegerTy() ||

8299DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)

8300returnfalse;

8301

8302// If LValue/HValue is a bitcast instruction, use the EVT before bitcast

8303// as the input of target query.

8304auto *LBC = dyn_cast<BitCastInst>(LValue);

8305auto *HBC = dyn_cast<BitCastInst>(HValue);

8306EVT LowTy = LBC ?EVT::getEVT(LBC->getOperand(0)->getType())

8307 :EVT::getEVT(LValue->getType());

8308EVT HighTy = HBC ?EVT::getEVT(HBC->getOperand(0)->getType())

8309 :EVT::getEVT(HValue->getType());

8310if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))

8311returnfalse;

8312

8313// Start to split store.

8314IRBuilder<> Builder(SI.getContext());

8315 Builder.SetInsertPoint(&SI);

8316

8317// If LValue/HValue is a bitcast in another BB, create a new one in current

8318// BB so it may be merged with the splitted stores by dag combiner.

8319if (LBC && LBC->getParent() != SI.getParent())

8320LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());

8321if (HBC && HBC->getParent() != SI.getParent())

8322 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());

8323

8324bool IsLE = SI.getDataLayout().isLittleEndian();

8325auto CreateSplitStore = [&](Value *V,boolUpper) {

8326 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);

8327Value *Addr = SI.getPointerOperand();

8328Align Alignment = SI.getAlign();

8329constbool IsOffsetStore = (IsLE &&Upper) || (!IsLE && !Upper);

8330if (IsOffsetStore) {

8331Addr = Builder.CreateGEP(

8332 SplitStoreType,Addr,

8333 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));

8334

8335// When splitting the store in half, naturally one half will retain the

8336// alignment of the original wider store, regardless of whether it was

8337// over-aligned or not, while the other will require adjustment.

8338 Alignment =commonAlignment(Alignment, HalfValBitSize / 8);

8339 }

8340 Builder.CreateAlignedStore(V,Addr, Alignment);

8341 };

8342

8343 CreateSplitStore(LValue,false);

8344 CreateSplitStore(HValue,true);

8345

8346// Delete the old store.

8347 SI.eraseFromParent();

8348returntrue;

8349}

8350

8351// Return true if the GEP has two operands, the first operand is of a sequential

8352// type, and the second operand is a constant.

8353staticboolGEPSequentialConstIndexed(GetElementPtrInst *GEP) {

8354gep_type_iterator I =gep_type_begin(*GEP);

8355returnGEP->getNumOperands() == 2 &&I.isSequential() &&

8356 isa<ConstantInt>(GEP->getOperand(1));

8357}

8358

8359// Try unmerging GEPs to reduce liveness interference (register pressure) across

8360// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,

8361// reducing liveness interference across those edges benefits global register

8362// allocation. Currently handles only certain cases.

8363//

8364// For example, unmerge %GEPI and %UGEPI as below.

8365//

8366// ---------- BEFORE ----------

8367// SrcBlock:

8368// ...

8369// %GEPIOp = ...

8370// ...

8371// %GEPI = gep %GEPIOp, Idx

8372// ...

8373// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]

8374// (* %GEPI is alive on the indirectbr edges due to other uses ahead)

8375// (* %GEPIOp is alive on the indirectbr edges only because of it's used by

8376// %UGEPI)

8377//

8378// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)

8379// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)

8380// ...

8381//

8382// DstBi:

8383// ...

8384// %UGEPI = gep %GEPIOp, UIdx

8385// ...

8386// ---------------------------

8387//

8388// ---------- AFTER ----------

8389// SrcBlock:

8390// ... (same as above)

8391// (* %GEPI is still alive on the indirectbr edges)

8392// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the

8393// unmerging)

8394// ...

8395//

8396// DstBi:

8397// ...

8398// %UGEPI = gep %GEPI, (UIdx-Idx)

8399// ...

8400// ---------------------------

8401//

8402// The register pressure on the IndirectBr edges is reduced because %GEPIOp is

8403// no longer alive on them.

8404//

8405// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging

8406// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as

8407// not to disable further simplications and optimizations as a result of GEP

8408// merging.

8409//

8410// Note this unmerging may increase the length of the data flow critical path

8411// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff

8412// between the register pressure and the length of data-flow critical

8413// path. Restricting this to the uncommon IndirectBr case would minimize the

8414// impact of potentially longer critical path, if any, and the impact on compile

8415// time.

8416staticbooltryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,

8417constTargetTransformInfo *TTI) {

8418BasicBlock *SrcBlock = GEPI->getParent();

8419// Check that SrcBlock ends with an IndirectBr. If not, give up. The common

8420// (non-IndirectBr) cases exit early here.

8421if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))

8422returnfalse;

8423// Check that GEPI is a simple gep with a single constant index.

8424if (!GEPSequentialConstIndexed(GEPI))

8425returnfalse;

8426ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));

8427// Check that GEPI is a cheap one.

8428if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),

8429TargetTransformInfo::TCK_SizeAndLatency) >

8430TargetTransformInfo::TCC_Basic)

8431returnfalse;

8432Value *GEPIOp = GEPI->getOperand(0);

8433// Check that GEPIOp is an instruction that's also defined in SrcBlock.

8434if (!isa<Instruction>(GEPIOp))

8435returnfalse;

8436auto *GEPIOpI = cast<Instruction>(GEPIOp);

8437if (GEPIOpI->getParent() != SrcBlock)

8438returnfalse;

8439// Check that GEP is used outside the block, meaning it's alive on the

8440// IndirectBr edge(s).

8441if (llvm::none_of(GEPI->users(), [&](User *Usr) {

8442 if (auto *I = dyn_cast<Instruction>(Usr)) {

8443 if (I->getParent() != SrcBlock) {

8444 return true;

8445 }

8446 }

8447returnfalse;

8448 }))

8449returnfalse;

8450// The second elements of the GEP chains to be unmerged.

8451 std::vector<GetElementPtrInst *> UGEPIs;

8452// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive

8453// on IndirectBr edges.

8454for (User *Usr : GEPIOp->users()) {

8455if (Usr == GEPI)

8456continue;

8457// Check if Usr is an Instruction. If not, give up.

8458if (!isa<Instruction>(Usr))

8459returnfalse;

8460auto *UI = cast<Instruction>(Usr);

8461// Check if Usr in the same block as GEPIOp, which is fine, skip.

8462if (UI->getParent() == SrcBlock)

8463continue;

8464// Check if Usr is a GEP. If not, give up.

8465if (!isa<GetElementPtrInst>(Usr))

8466returnfalse;

8467auto *UGEPI = cast<GetElementPtrInst>(Usr);

8468// Check if UGEPI is a simple gep with a single constant index and GEPIOp is

8469// the pointer operand to it. If so, record it in the vector. If not, give

8470// up.

8471if (!GEPSequentialConstIndexed(UGEPI))

8472returnfalse;

8473if (UGEPI->getOperand(0) != GEPIOp)

8474returnfalse;

8475if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())

8476returnfalse;

8477if (GEPIIdx->getType() !=

8478 cast<ConstantInt>(UGEPI->getOperand(1))->getType())

8479returnfalse;

8480ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));

8481if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),

8482TargetTransformInfo::TCK_SizeAndLatency) >

8483TargetTransformInfo::TCC_Basic)

8484returnfalse;

8485 UGEPIs.push_back(UGEPI);

8486 }

8487if (UGEPIs.size() == 0)

8488returnfalse;

8489// Check the materializing cost of (Uidx-Idx).

8490for (GetElementPtrInst *UGEPI : UGEPIs) {

8491ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));

8492APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();

8493InstructionCost ImmCost =TTI->getIntImmCost(

8494 NewIdx, GEPIIdx->getType(),TargetTransformInfo::TCK_SizeAndLatency);

8495if (ImmCost >TargetTransformInfo::TCC_Basic)

8496returnfalse;

8497 }

8498// Now unmerge between GEPI and UGEPIs.

8499for (GetElementPtrInst *UGEPI : UGEPIs) {

8500 UGEPI->setOperand(0, GEPI);

8501ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));

8502Constant *NewUGEPIIdx = ConstantInt::get(

8503 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());

8504 UGEPI->setOperand(1, NewUGEPIIdx);

8505// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not

8506// inbounds to avoid UB.

8507if (!GEPI->isInBounds()) {

8508 UGEPI->setIsInBounds(false);

8509 }

8510 }

8511// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not

8512// alive on IndirectBr edges).

8513assert(llvm::none_of(GEPIOp->users(),

8514 [&](User *Usr) {

8515 return cast<Instruction>(Usr)->getParent() != SrcBlock;

8516 }) &&

8517"GEPIOp is used outside SrcBlock");

8518returntrue;

8519}

8520

8521staticbooloptimizeBranch(BranchInst *Branch,constTargetLowering &TLI,

8522SmallSet<BasicBlock *, 32> &FreshBBs,

8523bool IsHugeFunc) {

8524// Try and convert

8525// %c = icmp ult %x, 8

8526// br %c, bla, blb

8527// %tc = lshr %x, 3

8528// to

8529// %tc = lshr %x, 3

8530// %c = icmp eq %tc, 0

8531// br %c, bla, blb

8532// Creating the cmp to zero can be better for the backend, especially if the

8533// lshr produces flags that can be used automatically.

8534if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())

8535returnfalse;

8536

8537ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());

8538if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())

8539returnfalse;

8540

8541Value *X = Cmp->getOperand(0);

8542APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();

8543

8544for (auto *U :X->users()) {

8545Instruction *UI = dyn_cast<Instruction>(U);

8546// A quick dominance check

8547if (!UI ||

8548 (UI->getParent() != Branch->getParent() &&

8549 UI->getParent() != Branch->getSuccessor(0) &&

8550 UI->getParent() != Branch->getSuccessor(1)) ||

8551 (UI->getParent() != Branch->getParent() &&

8552 !UI->getParent()->getSinglePredecessor()))

8553continue;

8554

8555if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&

8556match(UI,m_Shr(m_Specific(X),m_SpecificInt(CmpC.logBase2())))) {

8557IRBuilder<> Builder(Branch);

8558if (UI->getParent() != Branch->getParent())

8559 UI->moveBefore(Branch->getIterator());

8560 UI->dropPoisonGeneratingFlags();

8561Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,

8562 ConstantInt::get(UI->getType(), 0));

8563LLVM_DEBUG(dbgs() <<"Converting " << *Cmp <<"\n");

8564LLVM_DEBUG(dbgs() <<" to compare on zero: " << *NewCmp <<"\n");

8565replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);

8566returntrue;

8567 }

8568if (Cmp->isEquality() &&

8569 (match(UI,m_Add(m_Specific(X),m_SpecificInt(-CmpC))) ||

8570match(UI,m_Sub(m_Specific(X),m_SpecificInt(CmpC))))) {

8571IRBuilder<> Builder(Branch);

8572if (UI->getParent() != Branch->getParent())

8573 UI->moveBefore(Branch->getIterator());

8574 UI->dropPoisonGeneratingFlags();

8575Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,

8576 ConstantInt::get(UI->getType(), 0));

8577LLVM_DEBUG(dbgs() <<"Converting " << *Cmp <<"\n");

8578LLVM_DEBUG(dbgs() <<" to compare on zero: " << *NewCmp <<"\n");

8579replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);

8580returntrue;

8581 }

8582 }

8583returnfalse;

8584}

8585

8586bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {

8587bool AnyChange =false;

8588 AnyChange = fixupDbgVariableRecordsOnInst(*I);

8589

8590// Bail out if we inserted the instruction to prevent optimizations from

8591// stepping on each other's toes.

8592if (InsertedInsts.count(I))

8593return AnyChange;

8594

8595// TODO: Move into the switch on opcode below here.

8596if (PHINode *P = dyn_cast<PHINode>(I)) {

8597// It is possible for very late stage optimizations (such as SimplifyCFG)

8598// to introduce PHI nodes too late to be cleaned up. If we detect such a

8599// trivial PHI, go ahead and zap it here.

8600if (Value *V =simplifyInstruction(P, {*DL, TLInfo})) {

8601 LargeOffsetGEPMap.erase(P);

8602replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);

8603P->eraseFromParent();

8604 ++NumPHIsElim;

8605returntrue;

8606 }

8607return AnyChange;

8608 }

8609

8610if (CastInst *CI = dyn_cast<CastInst>(I)) {

8611// If the source of the cast is a constant, then this should have

8612// already been constant folded. The only reason NOT to constant fold

8613// it is if something (e.g. LSR) was careful to place the constant

8614// evaluation in a block other than then one that uses it (e.g. to hoist

8615// the address of globals out of a loop). If this is the case, we don't

8616// want to forward-subst the cast.

8617if (isa<Constant>(CI->getOperand(0)))

8618return AnyChange;

8619

8620if (OptimizeNoopCopyExpression(CI, *TLI, *DL))

8621returntrue;

8622

8623if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) ||

8624 isa<TruncInst>(I)) &&

8625 TLI->optimizeExtendOrTruncateConversion(

8626I, LI->getLoopFor(I->getParent()), *TTI))

8627returntrue;

8628

8629if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {

8630 /// Sink a zext or sext into its user blocks if the target type doesn't

8631 /// fit in one register

8632if (TLI->getTypeAction(CI->getContext(),

8633 TLI->getValueType(*DL, CI->getType())) ==

8634 TargetLowering::TypeExpandInteger) {

8635returnSinkCast(CI);

8636 }else {

8637if (TLI->optimizeExtendOrTruncateConversion(

8638I, LI->getLoopFor(I->getParent()), *TTI))

8639returntrue;

8640

8641bool MadeChange = optimizeExt(I);

8642return MadeChange | optimizeExtUses(I);

8643 }

8644 }

8645return AnyChange;

8646 }

8647

8648if (auto *Cmp = dyn_cast<CmpInst>(I))

8649if (optimizeCmp(Cmp, ModifiedDT))

8650returntrue;

8651

8652if (match(I,m_URem(m_Value(),m_Value())))

8653if (optimizeURem(I))

8654returntrue;

8655

8656if (LoadInst *LI = dyn_cast<LoadInst>(I)) {

8657 LI->setMetadata(LLVMContext::MD_invariant_group,nullptr);

8658boolModified = optimizeLoadExt(LI);

8659unsigned AS = LI->getPointerAddressSpace();

8660Modified |= optimizeMemoryInst(I,I->getOperand(0), LI->getType(), AS);

8661returnModified;

8662 }

8663

8664if (StoreInst *SI = dyn_cast<StoreInst>(I)) {

8665if (splitMergedValStore(*SI, *DL, *TLI))

8666returntrue;

8667SI->setMetadata(LLVMContext::MD_invariant_group,nullptr);

8668unsigned AS =SI->getPointerAddressSpace();

8669return optimizeMemoryInst(I,SI->getOperand(1),

8670SI->getOperand(0)->getType(), AS);

8671 }

8672

8673if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {

8674unsigned AS = RMW->getPointerAddressSpace();

8675return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);

8676 }

8677

8678if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {

8679unsigned AS = CmpX->getPointerAddressSpace();

8680return optimizeMemoryInst(I, CmpX->getPointerOperand(),

8681 CmpX->getCompareOperand()->getType(), AS);

8682 }

8683

8684BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);

8685

8686if (BinOp && BinOp->getOpcode() == Instruction::And &&EnableAndCmpSinking &&

8687sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))

8688returntrue;

8689

8690// TODO: Move this into the switch on opcode - it handles shifts already.

8691if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||

8692 BinOp->getOpcode() == Instruction::LShr)) {

8693ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));

8694if (CI && TLI->hasExtractBitsInsn())

8695if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))

8696returntrue;

8697 }

8698

8699if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {

8700if (GEPI->hasAllZeroIndices()) {

8701 /// The GEP operand must be a pointer, so must its result -> BitCast

8702Instruction *NC =newBitCastInst(GEPI->getOperand(0), GEPI->getType(),

8703 GEPI->getName(), GEPI->getIterator());

8704NC->setDebugLoc(GEPI->getDebugLoc());

8705replaceAllUsesWith(GEPI,NC, FreshBBs, IsHugeFunc);

8706RecursivelyDeleteTriviallyDeadInstructions(

8707 GEPI, TLInfo,nullptr,

8708 [&](Value *V) { removeAllAssertingVHReferences(V); });

8709 ++NumGEPsElim;

8710 optimizeInst(NC, ModifiedDT);

8711returntrue;

8712 }

8713if (tryUnmergingGEPsAcrossIndirectBr(GEPI,TTI)) {

8714returntrue;

8715 }

8716 }

8717

8718if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {

8719// freeze(icmp a, const)) -> icmp (freeze a), const

8720// This helps generate efficient conditional jumps.

8721Instruction *CmpI =nullptr;

8722if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))

8723 CmpI =II;

8724elseif (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))

8725 CmpI =F->getFastMathFlags().none() ?F :nullptr;

8726

8727if (CmpI && CmpI->hasOneUse()) {

8728auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);

8729bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||

8730 isa<ConstantPointerNull>(Op0);

8731bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||

8732 isa<ConstantPointerNull>(Op1);

8733if (Const0 || Const1) {

8734if (!Const0 || !Const1) {

8735auto *F =newFreezeInst(Const0 ? Op1 : Op0,"", CmpI->getIterator());

8736F->takeName(FI);

8737 CmpI->setOperand(Const0 ? 1 : 0,F);

8738 }

8739replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);

8740 FI->eraseFromParent();

8741returntrue;

8742 }

8743 }

8744return AnyChange;

8745 }

8746

8747if (tryToSinkFreeOperands(I))

8748returntrue;

8749

8750switch (I->getOpcode()) {

8751case Instruction::Shl:

8752case Instruction::LShr:

8753case Instruction::AShr:

8754return optimizeShiftInst(cast<BinaryOperator>(I));

8755case Instruction::Call:

8756returnoptimizeCallInst(cast<CallInst>(I), ModifiedDT);

8757case Instruction::Select:

8758return optimizeSelectInst(cast<SelectInst>(I));

8759case Instruction::ShuffleVector:

8760return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));

8761case Instruction::Switch:

8762return optimizeSwitchInst(cast<SwitchInst>(I));

8763case Instruction::ExtractElement:

8764return optimizeExtractElementInst(cast<ExtractElementInst>(I));

8765case Instruction::Br:

8766returnoptimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);

8767 }

8768

8769return AnyChange;

8770}

8771

8772/// Given an OR instruction, check to see if this is a bitreverse

8773/// idiom. If so, insert the new intrinsic and return true.

8774bool CodeGenPrepare::makeBitReverse(Instruction &I) {

8775if (!I.getType()->isIntegerTy() ||

8776 !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,

8777 TLI->getValueType(*DL,I.getType(),true)))

8778returnfalse;

8779

8780SmallVector<Instruction *, 4> Insts;

8781if (!recognizeBSwapOrBitReverseIdiom(&I,false,true, Insts))

8782returnfalse;

8783Instruction *LastInst = Insts.back();

8784replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);

8785RecursivelyDeleteTriviallyDeadInstructions(

8786 &I, TLInfo,nullptr,

8787 [&](Value *V) { removeAllAssertingVHReferences(V); });

8788returntrue;

8789}

8790

8791// In this pass we look for GEP and cast instructions that are used

8792// across basic blocks and rewrite them to improve basic-block-at-a-time

8793// selection.

8794bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {

8795 SunkAddrs.clear();

8796bool MadeChange =false;

8797

8798do {

8799 CurInstIterator = BB.begin();

8800 ModifiedDT = ModifyDT::NotModifyDT;

8801while (CurInstIterator != BB.end()) {

8802 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);

8803if (ModifiedDT != ModifyDT::NotModifyDT) {

8804// For huge function we tend to quickly go though the inner optmization

8805// opportunities in the BB. So we go back to the BB head to re-optimize

8806// each instruction instead of go back to the function head.

8807if (IsHugeFunc) {

8808 DT.reset();

8809 getDT(*BB.getParent());

8810break;

8811 }else {

8812returntrue;

8813 }

8814 }

8815 }

8816 }while (ModifiedDT == ModifyDT::ModifyInstDT);

8817

8818bool MadeBitReverse =true;

8819while (MadeBitReverse) {

8820 MadeBitReverse =false;

8821for (auto &I :reverse(BB)) {

8822if (makeBitReverse(I)) {

8823 MadeBitReverse = MadeChange =true;

8824break;

8825 }

8826 }

8827 }

8828 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);

8829

8830return MadeChange;

8831}

8832

8833// Some CGP optimizations may move or alter what's computed in a block. Check

8834// whether a dbg.value intrinsic could be pointed at a more appropriate operand.

8835bool CodeGenPrepare::fixupDbgValue(Instruction *I) {

8836assert(isa<DbgValueInst>(I));

8837DbgValueInst &DVI = *cast<DbgValueInst>(I);

8838

8839// Does this dbg.value refer to a sunk address calculation?

8840bool AnyChange =false;

8841SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),

8842 DVI.location_ops().end());

8843for (Value *Location : LocationOps) {

8844WeakTrackingVH SunkAddrVH = SunkAddrs[Location];

8845Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH :nullptr;

8846if (SunkAddr) {

8847// Point dbg.value at locally computed address, which should give the best

8848// opportunity to be accurately lowered. This update may change the type

8849// of pointer being referred to; however this makes no difference to

8850// debugging information, and we can't generate bitcasts that may affect

8851// codegen.

8852 DVI.replaceVariableLocationOp(Location, SunkAddr);

8853 AnyChange =true;

8854 }

8855 }

8856return AnyChange;

8857}

8858

8859bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {

8860bool AnyChange =false;

8861for (DbgVariableRecord &DVR :filterDbgVars(I.getDbgRecordRange()))

8862 AnyChange |= fixupDbgVariableRecord(DVR);

8863return AnyChange;

8864}

8865

8866// FIXME: should updating debug-info really cause the "changed" flag to fire,

8867// which can cause a function to be reprocessed?

8868bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {

8869if (DVR.Type != DbgVariableRecord::LocationType::Value &&

8870 DVR.Type != DbgVariableRecord::LocationType::Assign)

8871returnfalse;

8872

8873// Does this DbgVariableRecord refer to a sunk address calculation?

8874bool AnyChange =false;

8875SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),

8876 DVR.location_ops().end());

8877for (Value *Location : LocationOps) {

8878WeakTrackingVH SunkAddrVH = SunkAddrs[Location];

8879Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH :nullptr;

8880if (SunkAddr) {

8881// Point dbg.value at locally computed address, which should give the best

8882// opportunity to be accurately lowered. This update may change the type

8883// of pointer being referred to; however this makes no difference to

8884// debugging information, and we can't generate bitcasts that may affect

8885// codegen.

8886 DVR.replaceVariableLocationOp(Location, SunkAddr);

8887 AnyChange =true;

8888 }

8889 }

8890return AnyChange;

8891}

8892

8893staticvoidDbgInserterHelper(DbgValueInst *DVI,BasicBlock::iterator VI) {

8894 DVI->removeFromParent();

8895if (isa<PHINode>(VI))

8896 DVI->insertBefore(VI->getParent()->getFirstInsertionPt());

8897else

8898 DVI->insertAfter(VI);

8899}

8900

8901staticvoidDbgInserterHelper(DbgVariableRecord *DVR,BasicBlock::iterator VI) {

8902 DVR->removeFromParent();

8903BasicBlock *VIBB = VI->getParent();

8904if (isa<PHINode>(VI))

8905 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());

8906else

8907 VIBB->insertDbgRecordAfter(DVR, &*VI);

8908}

8909

8910// A llvm.dbg.value may be using a value before its definition, due to

8911// optimizations in this pass and others. Scan for such dbg.values, and rescue

8912// them by moving the dbg.value to immediately after the value definition.

8913// FIXME: Ideally this should never be necessary, and this has the potential

8914// to re-order dbg.value intrinsics.

8915bool CodeGenPrepare::placeDbgValues(Function &F) {

8916bool MadeChange =false;

8917DominatorTree DT(F);

8918

8919auto DbgProcessor = [&](auto *DbgItem,Instruction *Position) {

8920SmallVector<Instruction *, 4> VIs;

8921for (Value *V : DbgItem->location_ops())

8922if (Instruction *VI = dyn_cast_or_null<Instruction>(V))

8923 VIs.push_back(VI);

8924

8925// This item may depend on multiple instructions, complicating any

8926// potential sink. This block takes the defensive approach, opting to

8927// "undef" the item if it has more than one instruction and any of them do

8928// not dominate iem.

8929for (Instruction *VI : VIs) {

8930if (VI->isTerminator())

8931continue;

8932

8933// If VI is a phi in a block with an EHPad terminator, we can't insert

8934// after it.

8935if (isa<PHINode>(VI) &&VI->getParent()->getTerminator()->isEHPad())

8936continue;

8937

8938// If the defining instruction dominates the dbg.value, we do not need

8939// to move the dbg.value.

8940if (DT.dominates(VI, Position))

8941continue;

8942

8943// If we depend on multiple instructions and any of them doesn't

8944// dominate this DVI, we probably can't salvage it: moving it to

8945// after any of the instructions could cause us to lose the others.

8946if (VIs.size() > 1) {

8947LLVM_DEBUG(

8948dbgs()

8949 <<"Unable to find valid location for Debug Value, undefing:\n"

8950 << *DbgItem);

8951 DbgItem->setKillLocation();

8952break;

8953 }

8954

8955LLVM_DEBUG(dbgs() <<"Moving Debug Value before :\n"

8956 << *DbgItem <<' ' << *VI);

8957DbgInserterHelper(DbgItem,VI->getIterator());

8958 MadeChange =true;

8959 ++NumDbgValueMoved;

8960 }

8961 };

8962

8963for (BasicBlock &BB :F) {

8964for (Instruction &Insn :llvm::make_early_inc_range(BB)) {

8965// Process dbg.value intrinsics.

8966DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);

8967if (DVI) {

8968 DbgProcessor(DVI, DVI);

8969continue;

8970 }

8971

8972// If this isn't a dbg.value, process any attached DbgVariableRecord

8973// records attached to this instruction.

8974for (DbgVariableRecord &DVR :llvm::make_early_inc_range(

8975filterDbgVars(Insn.getDbgRecordRange()))) {

8976if (DVR.Type != DbgVariableRecord::LocationType::Value)

8977continue;

8978 DbgProcessor(&DVR, &Insn);

8979 }

8980 }

8981 }

8982

8983return MadeChange;

8984}

8985

8986// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered

8987// probes can be chained dependencies of other regular DAG nodes and block DAG

8988// combine optimizations.

8989bool CodeGenPrepare::placePseudoProbes(Function &F) {

8990bool MadeChange =false;

8991for (auto &Block :F) {

8992// Move the rest probes to the beginning of the block.

8993auto FirstInst =Block.getFirstInsertionPt();

8994while (FirstInst !=Block.end() && FirstInst->isDebugOrPseudoInst())

8995 ++FirstInst;

8996BasicBlock::iterator I(FirstInst);

8997I++;

8998while (I !=Block.end()) {

8999if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {

9000II->moveBefore(FirstInst);

9001 MadeChange =true;

9002 }

9003 }

9004 }

9005return MadeChange;

9006}

9007

9008/// Scale down both weights to fit into uint32_t.

9009staticvoidscaleWeights(uint64_t &NewTrue,uint64_t &NewFalse) {

9010uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;

9011uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;

9012 NewTrue = NewTrue / Scale;

9013 NewFalse = NewFalse / Scale;

9014}

9015

9016/// Some targets prefer to split a conditional branch like:

9017/// \code

9018/// %0 = icmp ne i32 %a, 0

9019/// %1 = icmp ne i32 %b, 0

9020/// %or.cond = or i1 %0, %1

9021/// br i1 %or.cond, label %TrueBB, label %FalseBB

9022/// \endcode

9023/// into multiple branch instructions like:

9024/// \code

9025/// bb1:

9026/// %0 = icmp ne i32 %a, 0

9027/// br i1 %0, label %TrueBB, label %bb2

9028/// bb2:

9029/// %1 = icmp ne i32 %b, 0

9030/// br i1 %1, label %TrueBB, label %FalseBB

9031/// \endcode

9032/// This usually allows instruction selection to do even further optimizations

9033/// and combine the compare with the branch instruction. Currently this is

9034/// applied for targets which have "cheap" jump instructions.

9035///

9036/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.

9037///

9038bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {

9039if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())

9040returnfalse;

9041

9042bool MadeChange =false;

9043for (auto &BB :F) {

9044// Does this BB end with the following?

9045// %cond1 = icmp|fcmp|binary instruction ...

9046// %cond2 = icmp|fcmp|binary instruction ...

9047// %cond.or = or|and i1 %cond1, cond2

9048// br i1 %cond.or label %dest1, label %dest2"

9049Instruction *LogicOp;

9050BasicBlock *TBB, *FBB;

9051if (!match(BB.getTerminator(),

9052m_Br(m_OneUse(m_Instruction(LogicOp)),TBB, FBB)))

9053continue;

9054

9055auto *Br1 = cast<BranchInst>(BB.getTerminator());

9056if (Br1->getMetadata(LLVMContext::MD_unpredictable))

9057continue;

9058

9059// The merging of mostly empty BB can cause a degenerate branch.

9060if (TBB == FBB)

9061continue;

9062

9063unsigned Opc;

9064Value *Cond1, *Cond2;

9065if (match(LogicOp,

9066m_LogicalAnd(m_OneUse(m_Value(Cond1)),m_OneUse(m_Value(Cond2)))))

9067 Opc = Instruction::And;

9068elseif (match(LogicOp,m_LogicalOr(m_OneUse(m_Value(Cond1)),

9069m_OneUse(m_Value(Cond2)))))

9070 Opc = Instruction::Or;

9071else

9072continue;

9073

9074auto IsGoodCond = [](Value *Cond) {

9075returnmatch(

9076Cond,

9077m_CombineOr(m_Cmp(),m_CombineOr(m_LogicalAnd(m_Value(),m_Value()),

9078m_LogicalOr(m_Value(),m_Value()))));

9079 };

9080if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))

9081continue;

9082

9083LLVM_DEBUG(dbgs() <<"Before branch condition splitting\n"; BB.dump());

9084

9085// Create a new BB.

9086auto *TmpBB =

9087BasicBlock::Create(BB.getContext(), BB.getName() +".cond.split",

9088 BB.getParent(), BB.getNextNode());

9089if (IsHugeFunc)

9090 FreshBBs.insert(TmpBB);

9091

9092// Update original basic block by using the first condition directly by the

9093// branch instruction and removing the no longer needed and/or instruction.

9094 Br1->setCondition(Cond1);

9095 LogicOp->eraseFromParent();

9096

9097// Depending on the condition we have to either replace the true or the

9098// false successor of the original branch instruction.

9099if (Opc == Instruction::And)

9100 Br1->setSuccessor(0, TmpBB);

9101else

9102 Br1->setSuccessor(1, TmpBB);

9103

9104// Fill in the new basic block.

9105auto *Br2 =IRBuilder<>(TmpBB).CreateCondBr(Cond2,TBB, FBB);

9106if (auto *I = dyn_cast<Instruction>(Cond2)) {

9107I->removeFromParent();

9108I->insertBefore(Br2->getIterator());

9109 }

9110

9111// Update PHI nodes in both successors. The original BB needs to be

9112// replaced in one successor's PHI nodes, because the branch comes now from

9113// the newly generated BB (NewBB). In the other successor we need to add one

9114// incoming edge to the PHI nodes, because both branch instructions target

9115// now the same successor. Depending on the original branch condition

9116// (and/or) we have to swap the successors (TrueDest, FalseDest), so that

9117// we perform the correct update for the PHI nodes.

9118// This doesn't change the successor order of the just created branch

9119// instruction (or any other instruction).

9120if (Opc == Instruction::Or)

9121std::swap(TBB, FBB);

9122

9123// Replace the old BB with the new BB.

9124TBB->replacePhiUsesWith(&BB, TmpBB);

9125

9126// Add another incoming edge from the new BB.

9127for (PHINode &PN : FBB->phis()) {

9128auto *Val = PN.getIncomingValueForBlock(&BB);

9129 PN.addIncoming(Val, TmpBB);

9130 }

9131

9132// Update the branch weights (from SelectionDAGBuilder::

9133// FindMergedConditions).

9134if (Opc == Instruction::Or) {

9135// Codegen X | Y as:

9136// BB1:

9137// jmp_if_X TBB

9138// jmp TmpBB

9139// TmpBB:

9140// jmp_if_Y TBB

9141// jmp FBB

9142//

9143

9144// We have flexibility in setting Prob for BB1 and Prob for NewBB.

9145// The requirement is that

9146// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)

9147// = TrueProb for original BB.

9148// Assuming the original weights are A and B, one choice is to set BB1's

9149// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice

9150// assumes that

9151// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.

9152// Another choice is to assume TrueProb for BB1 equals to TrueProb for

9153// TmpBB, but the math is more complicated.

9154uint64_t TrueWeight, FalseWeight;

9155if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {

9156uint64_t NewTrueWeight = TrueWeight;

9157uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;

9158scaleWeights(NewTrueWeight, NewFalseWeight);

9159 Br1->setMetadata(LLVMContext::MD_prof,

9160MDBuilder(Br1->getContext())

9161 .createBranchWeights(TrueWeight, FalseWeight,

9162hasBranchWeightOrigin(*Br1)));

9163

9164 NewTrueWeight = TrueWeight;

9165 NewFalseWeight = 2 * FalseWeight;

9166scaleWeights(NewTrueWeight, NewFalseWeight);

9167 Br2->setMetadata(LLVMContext::MD_prof,

9168MDBuilder(Br2->getContext())

9169 .createBranchWeights(TrueWeight, FalseWeight));

9170 }

9171 }else {

9172// Codegen X & Y as:

9173// BB1:

9174// jmp_if_X TmpBB

9175// jmp FBB

9176// TmpBB:

9177// jmp_if_Y TBB

9178// jmp FBB

9179//

9180// This requires creation of TmpBB after CurBB.

9181

9182// We have flexibility in setting Prob for BB1 and Prob for TmpBB.

9183// The requirement is that

9184// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)

9185// = FalseProb for original BB.

9186// Assuming the original weights are A and B, one choice is to set BB1's

9187// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice

9188// assumes that

9189// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.

9190uint64_t TrueWeight, FalseWeight;

9191if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {

9192uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;

9193uint64_t NewFalseWeight = FalseWeight;

9194scaleWeights(NewTrueWeight, NewFalseWeight);

9195 Br1->setMetadata(LLVMContext::MD_prof,

9196MDBuilder(Br1->getContext())

9197 .createBranchWeights(TrueWeight, FalseWeight));

9198

9199 NewTrueWeight = 2 * TrueWeight;

9200 NewFalseWeight = FalseWeight;

9201scaleWeights(NewTrueWeight, NewFalseWeight);

9202 Br2->setMetadata(LLVMContext::MD_prof,

9203MDBuilder(Br2->getContext())

9204 .createBranchWeights(TrueWeight, FalseWeight));

9205 }

9206 }

9207

9208 ModifiedDT = ModifyDT::ModifyBBDT;

9209 MadeChange =true;

9210

9211LLVM_DEBUG(dbgs() <<"After branch condition splitting\n"; BB.dump();

9212 TmpBB->dump());

9213 }

9214return MadeChange;

9215}

Success

#define Success

Definition:AArch64Disassembler.cpp:220

for

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

Definition:AArch64ExpandPseudoInsts.cpp:115

getIntrinsicID

static unsigned getIntrinsicID(const SDNode *N)

Definition:AArch64ISelLowering.cpp:7713

canCombine

static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)

Definition:AArch64InstrInfo.cpp:6370

Insn

SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn

Definition:AArch64MIPeepholeOpt.cpp:167

Select

AMDGPU Register Bank Select

Definition:AMDGPURegBankSelect.cpp:71

PHI

Rewrite undef for PHI

Definition:AMDGPURewriteUndefForPHI.cpp:100

APInt.h

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

ArrayRef.h

PGOMapFeaturesEnum::BBFreq

@ BBFreq

Attributes.h

This file contains the simple types necessary to represent the attributes associated with functions a...

getParent

static const Function * getParent(const Value *V)

Definition:BasicAliasAnalysis.cpp:863

BasicBlockSectionsProfileReader.h

BlockVerifier::State From

Definition:BlockVerifier.cpp:57

BranchProbabilityInfo.h

BranchProbability.h

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

BypassSlowDivision.h

Casting.h

sinkAndCmp0Expression

static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)

Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...

Definition:CodeGenPrepare.cpp:2207

SinkShiftAndTruncate

static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)

Sink both shift and truncate instruction to the use of truncate's BB.

Definition:CodeGenPrepare.cpp:2299

getGEPSmallConstantIntOffsetV

static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)

Definition:CodeGenPrepare.cpp:1236

generation

Optimize for code generation

Definition:CodeGenPrepare.cpp:539

sinkSelectOperand

static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)

Check if V (an operand of a select instruction) is an expensive instruction that is only used once.

Definition:CodeGenPrepare.cpp:7265

replaceAllUsesWith

static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)

Replace all old uses with new ones, and push the updated BBs into FreshBBs.

Definition:CodeGenPrepare.cpp:1112

isExtractBitsCandidateUse

static bool isExtractBitsCandidateUse(Instruction *User)

Check if the candidates could be combined with a shift instruction, which includes:

Definition:CodeGenPrepare.cpp:2283

MaxAddressUsersToScan

static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))

OptimizePhiTypes

static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))

DisableStoreExtract

static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))

foldFCmpToFPClassTest

static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)

Definition:CodeGenPrepare.cpp:1950

sinkCmpExpression

static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)

Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...

Definition:CodeGenPrepare.cpp:1772

scaleWeights

static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)

Scale down both weights to fit into uint32_t.

Definition:CodeGenPrepare.cpp:9009

ProfileUnknownInSpecialSection

static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))

OptimizeExtractBits

static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)

Sink the shift right instruction into user blocks if the uses could potentially be combined with this...

Definition:CodeGenPrepare.cpp:2392

DisableExtLdPromotion

static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))

DisablePreheaderProtect

static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))

AddrSinkCombineBaseOffs

static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))

OptimizeNoopCopyExpression

static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)

If the specified cast instruction is a noop copy (e.g.

Definition:CodeGenPrepare.cpp:1465

splitMergedValStore

static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)

For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...

Definition:CodeGenPrepare.cpp:8255

SinkCast

static bool SinkCast(CastInst *CI)

Sink the specified cast instruction into its user blocks.

Definition:CodeGenPrepare.cpp:1397

swapICmpOperandsToExposeCSEOpportunities

static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)

Many architectures use the same instruction for both subtract and cmp.

Definition:CodeGenPrepare.cpp:1921

AddrSinkCombineBaseReg

static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))

FindAllMemoryUses

static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)

Recursively walk all the uses of I until we find a memory use.

Definition:CodeGenPrepare.cpp:5495

StressStoreExtract

static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))

isFormingBranchFromSelectProfitable

static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)

Returns true if a SelectInst should be turned into an explicit branch.

Definition:CodeGenPrepare.cpp:7274

getIVIncrement

static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)

If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...

Definition:CodeGenPrepare.cpp:1527

AddrSinkCombineBaseGV

static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))

AddrSinkUsingGEPs

static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))

getTrueOrFalseValue

static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)

If isTrue is true, return the true value of SI, otherwise return false value of SI.

Definition:CodeGenPrepare.cpp:7319

DbgInserterHelper

static void DbgInserterHelper(DbgValueInst *DVI, BasicBlock::iterator VI)

Definition:CodeGenPrepare.cpp:8893

DisableBranchOpts

static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))

EnableTypePromotionMerge

static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))

adjustIsPower2Test

static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, const TargetTransformInfo &TTI, const DataLayout &DL)

Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1.

Definition:CodeGenPrepare.cpp:2155

ProfileGuidedSectionPrefix

static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))

HugeFuncThresholdInCGPP

static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))

AddrSinkNewSelects

static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))

tryUnmergingGEPsAcrossIndirectBr

static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)

Definition:CodeGenPrepare.cpp:8416

IsOperandAMemoryOperand

static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)

Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.

Definition:CodeGenPrepare.cpp:5470

isIntrinsicOrLFToBeTailCalled

static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)

Definition:CodeGenPrepare.cpp:2810

ForceSplitStore

static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))

computeBaseDerivedRelocateMap

static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)

Definition:CodeGenPrepare.cpp:1201

simplifyRelocatesOffABase

static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)

Definition:CodeGenPrepare.cpp:1253

AddrSinkCombineScaledReg

static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))

foldICmpWithDominatingICmp

static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)

For pattern like:

Definition:CodeGenPrepare.cpp:1851

MightBeFoldableInst

static bool MightBeFoldableInst(Instruction *I)

This is a little filter, which returns true if an addressing computation involving I might be folded ...

Definition:CodeGenPrepare.cpp:4613

matchIncrement

static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)

Definition:CodeGenPrepare.cpp:1508

EnableGEPOffsetSplit

static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))

DisableComplexAddrModes

static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))

EnableICMP_EQToICMP_ST

static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))

VerifyBFIUpdates

static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))

BBSectionsGuidedSectionPrefix

static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))

isRemOfLoopIncrementWithLoopInvariant

static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)

Definition:CodeGenPrepare.cpp:1983

isIVIncrement

static bool isIVIncrement(const Value *V, const LoopInfo *LI)

Definition:CodeGenPrepare.cpp:1542

DisableGCOpts

static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))

GEPSequentialConstIndexed

static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)

Definition:CodeGenPrepare.cpp:8353

isPromotedInstructionLegal

static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)

Check whether or not Val is a legal instruction for TLI.

Definition:CodeGenPrepare.cpp:4644

FreqRatioToSkipMerge

static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))

optimizeBranch

static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)

Definition:CodeGenPrepare.cpp:8521

DEBUG_TYPE

#define DEBUG_TYPE

Definition:CodeGenPrepare.cpp:109

IsNonLocalValue

static bool IsNonLocalValue(Value *V, BasicBlock *BB)

Return true if the specified values are defined in a different basic block than BB.

Definition:CodeGenPrepare.cpp:5714

EnableAndCmpSinking

static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))

despeculateCountZeros

static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)

If counting leading or trailing zeros is an expensive operation and a zero input is defined,...

Definition:CodeGenPrepare.cpp:2494

hasSameExtUse

static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)

Check if all the uses of Val are equivalent (or free) zero or sign extensions.

Definition:CodeGenPrepare.cpp:6316

StressExtLdPromotion

static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))

matchUAddWithOverflowConstantEdgeCases

static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)

Match special-case patterns that check for unsigned add overflow.

Definition:CodeGenPrepare.cpp:1638

DisableSelectToBranch

static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))

DisableDeletePHIs

static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))

foldURemOfLoopIncrement

static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)

Definition:CodeGenPrepare.cpp:2068

AddrSinkNewPhis

static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))

CodeGenPrepare.h

Defines an IR pass for CodeGen Prepare.

CommandLine.h

Compiler.h

LLVM_DUMP_METHOD

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

Definition:Compiler.h:622

LLVM_ATTRIBUTE_UNUSED

#define LLVM_ATTRIBUTE_UNUSED

Definition:Compiler.h:282

Constants.h

This file contains the declarations for the subclasses of Constant, which represent the different fla...

CostKind

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

DataLayout.h

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition:Debug.h:106

DenseMap.h

This file defines the DenseMap class.

DerivedTypes.h

Dominators.h

Addr

uint64_t Addr

Definition:ELFObjHandler.cpp:79

Blocks

DenseMap< Block *, BlockRelaxAux > Blocks

Definition:ELF_riscv.cpp:507

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

GetElementPtrTypeIterator.h

GlobalValue.h

GlobalVariable.h

GEP

Hexagon Common GEP

Definition:HexagonCommonGEP.cpp:170

IRBuilder.h

IRTranslator LLVM IR MI

Definition:IRTranslator.cpp:112

Module.h This file contains the declarations for the Module class.

Operator.h

Type.h

Use.h

This defines the Use class.

InstructionSimplify.h

Instructions.h

Intrinsics.h

ReferenceKind::LValue

@ LValue

eraseInstruction

static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)

Definition:LICM.cpp:1504

LLVMContext.h

LoopDeletionResult::Modified

@ Modified

LoopInfo.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

MDBuilder.h

AddrMode

Definition:MSP430Disassembler.cpp:141

TRI

unsigned const TargetRegisterInfo * TRI

Definition:MachineSink.cpp:2029

MachineValueType.h

MapVector.h

This file implements a map that provides insertion order iteration.

uint64_t IntrinsicInst * II

Definition:NVVMIntrRange.cpp:51

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

#define P(N)

INITIALIZE_PASS_DEPENDENCY

#define INITIALIZE_PASS_DEPENDENCY(depName)

Definition:PassSupport.h:55

INITIALIZE_PASS_END

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:57

INITIALIZE_PASS_BEGIN

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:52

Pass.h

PatternMatch.h

PointerIntPair.h

This file defines the PointerIntPair class.

ProfDataUtils.h

This file contains the declarations for profiling metadata utility functions.

ProfileSummaryInfo.h

TBB

const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB

Definition:RISCVRedundantCopyElimination.cpp:76

Cond

const SmallVectorImpl< MachineOperand > & Cond

Definition:RISCVRedundantCopyElimination.cpp:75

dominates

static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)

Definition:RegAllocFast.cpp:485

Uses

Remove Loads Into Fake Uses

Definition:RemoveLoadsIntoFakeUses.cpp:75

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Address

@ Address

Definition:SPIRVEmitNonSemanticDI.cpp:68

STLExtras.h

This file contains some templates that are useful if you are working with the STL at all.

raw_pwrite_stream & OS

Definition:SampleProfWriter.cpp:51

ScalarEvolutionExpressions.h

optimizeBlock

static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)

Definition:ScalarizeMaskedMemIntrin.cpp:1060

optimizeCallInst

static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)

Definition:ScalarizeMaskedMemIntrin.cpp:1077

This file defines the SmallPtrSet class.

SmallVector.h

This file defines the SmallVector class.

IRDumpFileSuffixType::Before

@ Before

Statepoint.h

Statistic.h

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC

#define STATISTIC(VARNAME, DESC)

Definition:Statistic.h:166

getType

static SymbolRef::Type getType(const Symbol *Sym)

Definition:TapiFile.cpp:39

Ptr

@ Ptr

Definition:TargetLibraryInfo.cpp:77

TargetLibraryInfo.h

TargetLowering.h

This file describes how to lower LLVM code to machine code.

TargetOptions.h

DisableSelectOptimize

static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))

Disable the select optimization pass.

TargetPassConfig.h

Target-Independent Code Generator Pass Configuration Options pass.

TargetSubtargetInfo.h

TargetTransformInfo.h

This pass exposes codegen information to IR-level passes.

static unsigned getBitWidth(Type *Ty, const DataLayout &DL)

Returns the bitwidth of the given scalar or pointer type.

Definition:ValueTracking.cpp:93

static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)

Definition:X86ISelLowering.cpp:7419

RHS

Value * RHS

Definition:X86PartialReduction.cpp:74

LHS

Value * LHS

Definition:X86PartialReduction.cpp:73

BaseTy

PointerType

Definition:ItaniumDemangle.h:627

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::APInt::zext

APInt zext(unsigned width) const

Zero extend to a new width.

Definition:APInt.cpp:986

llvm::APInt::ugt

bool ugt(const APInt &RHS) const

Unsigned greater than comparison.

Definition:APInt.h:1182

llvm::APInt::isZero

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

Definition:APInt.h:380

llvm::APInt::isSignedIntN

bool isSignedIntN(unsigned N) const

Check if this APInt has an N-bits signed integer value.

Definition:APInt.h:435

llvm::APInt::getSignificantBits

unsigned getSignificantBits() const

Get the minimum bit size for this signed APInt.

Definition:APInt.h:1511

llvm::APInt::logBase2

unsigned logBase2() const

Definition:APInt.h:1739

llvm::APInt::sext

APInt sext(unsigned width) const

Sign extend to a new width.

Definition:APInt.cpp:959

llvm::APInt::isPowerOf2

bool isPowerOf2() const

Check if this APInt's value is a power of two greater than zero.

Definition:APInt.h:440

llvm::APInt::getSExtValue

int64_t getSExtValue() const

Get sign extended value.

Definition:APInt.h:1542

llvm::AllocaInst

an instruction to allocate memory on the stack

Definition:Instructions.h:63

llvm::AllocaInst::isStaticAlloca

bool isStaticAlloca() const

Return true if this alloca is in the entry block of the function and is a constant size.

Definition:Instructions.cpp:1234

llvm::AllocaInst::getAlign

Align getAlign() const

Return the alignment of the memory that is being allocated by the instruction.

Definition:Instructions.h:124

llvm::AllocaInst::getAllocatedType

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

Definition:Instructions.h:117

llvm::AllocaInst::setAlignment

void setAlignment(Align Align)

Definition:Instructions.h:128

llvm::AnalysisManager

A container for analyses that lazily runs them and caches their results.

Definition:PassManager.h:253

llvm::AnalysisManager::getCachedResult

PassT::Result * getCachedResult(IRUnitT &IR) const

Get the cached result of an analysis pass for a given IR unit.

Definition:PassManager.h:429

llvm::AnalysisManager::getResult

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Definition:PassManager.h:410

llvm::AnalysisUsage

Represent the analysis usage information of a pass.

Definition:PassAnalysisSupport.h:47

llvm::AnalysisUsage::addUsedIfAvailable

AnalysisUsage & addUsedIfAvailable()

Add the specified Pass class to the set of analyses used by this pass.

Definition:PassAnalysisSupport.h:117

llvm::AnalysisUsage::addRequired

AnalysisUsage & addRequired()

Definition:PassAnalysisSupport.h:75

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::AssertingVH

Value handle that asserts if the Value is deleted.

Definition:ValueHandle.h:264

llvm::AtomicCmpXchgInst

An instruction that atomically checks whether a specified value is in a memory location,...

Definition:Instructions.h:501

llvm::AtomicCmpXchgInst::getPointerOperandIndex

static unsigned getPointerOperandIndex()

Definition:Instructions.h:631

llvm::AtomicRMWInst

an instruction that atomically reads a memory location, combines it with another value,...

Definition:Instructions.h:704

llvm::AtomicRMWInst::getPointerOperandIndex

static unsigned getPointerOperandIndex()

Definition:Instructions.h:872

llvm::BasicBlockSectionsProfileReaderAnalysis

Analysis pass providing the BasicBlockSectionsProfileReader.

Definition:BasicBlockSectionsProfileReader.h:165

llvm::BasicBlockSectionsProfileReaderWrapperPass

Definition:BasicBlockSectionsProfileReader.h:178

llvm::BasicBlockSectionsProfileReader

Definition:BasicBlockSectionsProfileReader.h:76

llvm::BasicBlockSectionsProfileReader::isFunctionHot

bool isFunctionHot(StringRef FuncName) const

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::end

iterator end()

Definition:BasicBlock.h:474

llvm::BasicBlock::begin

iterator begin()

Instruction iterator methods.

Definition:BasicBlock.h:461

llvm::BasicBlock::phis

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

Definition:BasicBlock.h:530

llvm::BasicBlock::getFirstInsertionPt

const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

Definition:BasicBlock.cpp:437

llvm::BasicBlock::hasAddressTaken

bool hasAddressTaken() const

Returns true if there are any uses of this basic block other than direct branches,...

Definition:BasicBlock.h:671

llvm::BasicBlock::getFirstNonPHIIt

InstListType::const_iterator getFirstNonPHIIt() const

Returns an iterator to the first instruction in this block that is not a PHINode instruction.

Definition:BasicBlock.cpp:381

llvm::BasicBlock::insertDbgRecordBefore

void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)

Insert a DbgRecord into a block at the position given by Here.

Definition:BasicBlock.cpp:1088

llvm::BasicBlock::const_iterator

InstListType::const_iterator const_iterator

Definition:BasicBlock.h:178

llvm::BasicBlock::Create

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

Definition:BasicBlock.h:213

llvm::BasicBlock::getFirstNonPHIOrDbg

InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const

Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...

Definition:BasicBlock.cpp:398

llvm::BasicBlock::splitBasicBlock

BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)

Split the basic block into two basic blocks at the specified instruction.

Definition:BasicBlock.cpp:599

llvm::BasicBlock::getSinglePredecessor

const BasicBlock * getSinglePredecessor() const

Return the predecessor of this block if it has a single predecessor block.

Definition:BasicBlock.cpp:481

llvm::BasicBlock::getUniquePredecessor

const BasicBlock * getUniquePredecessor() const

Return the predecessor of this block if it has a unique predecessor block.

Definition:BasicBlock.cpp:489

llvm::BasicBlock::getSingleSuccessor

const BasicBlock * getSingleSuccessor() const

Return the successor of this block if it has a single successor.

Definition:BasicBlock.cpp:511

llvm::BasicBlock::getParent

const Function * getParent() const

Return the enclosing method, or null if none.

Definition:BasicBlock.h:220

llvm::BasicBlock::eraseFromParent

SymbolTableList< BasicBlock >::iterator eraseFromParent()

Unlink 'this' from the containing function and delete it.

Definition:BasicBlock.cpp:279

llvm::BasicBlock::insertDbgRecordAfter

void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)

Insert a DbgRecord into a block at the position given by I.

Definition:BasicBlock.cpp:1079

llvm::BasicBlock::iterator

InstListType::iterator iterator

Instruction iterators...

Definition:BasicBlock.h:177

llvm::BasicBlock::getContext

LLVMContext & getContext() const

Get the context in which this basic block lives.

Definition:BasicBlock.cpp:168

llvm::BasicBlock::IsNewDbgInfoFormat

bool IsNewDbgInfoFormat

Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...

Definition:BasicBlock.h:67

llvm::BasicBlock::getTerminator

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Definition:BasicBlock.h:240

llvm::BinaryOperator

Definition:InstrTypes.h:170

llvm::BinaryOperator::getOpcode

BinaryOps getOpcode() const

Definition:InstrTypes.h:370

llvm::BinaryOperator::Create

static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)

Construct a binary instruction, given the opcode and the two operands.

Definition:Instructions.cpp:2639

llvm::BitCastInst

This class represents a no-op cast from one type to another.

Definition:Instructions.h:4894

llvm::BlockFrequencyInfo

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Definition:BlockFrequencyInfo.h:37

llvm::BlockFrequency

Definition:BlockFrequency.h:26

llvm::BranchInst

Conditional or Unconditional Branch instruction.

Definition:Instructions.h:3016

llvm::BranchInst::swapSuccessors

void swapSuccessors()

Swap the successors of this branch instruction.

Definition:Instructions.cpp:1168

llvm::BranchInst::isConditional

bool isConditional() const

Definition:Instructions.h:3090

llvm::BranchInst::getSuccessor

BasicBlock * getSuccessor(unsigned i) const

Definition:Instructions.h:3104

llvm::BranchInst::isUnconditional

bool isUnconditional() const

Definition:Instructions.h:3089

llvm::BranchProbabilityInfo

Analysis providing branch probability information.

Definition:BranchProbabilityInfo.h:112

llvm::BranchProbability::getBranchProbability

static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)

Definition:BranchProbability.cpp:53

llvm::CallBase::isInlineAsm

bool isInlineAsm() const

Check if this call is an inline asm statement.

Definition:InstrTypes.h:1408

llvm::CallBase::getCalledFunction

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

Definition:InstrTypes.h:1341

llvm::CallBase::hasFnAttr

bool hasFnAttr(Attribute::AttrKind Kind) const

Determine whether this call has the given attribute.

Definition:InstrTypes.h:1451

llvm::CallBase::getArgOperand

Value * getArgOperand(unsigned i) const

Definition:InstrTypes.h:1286

llvm::CallBase::setArgOperand

void setArgOperand(unsigned i, Value *v)

Definition:InstrTypes.h:1291

llvm::CallBase::args

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

Definition:InstrTypes.h:1277

llvm::CallInst

This class represents a function call, abstracting a target machine's calling convention.

Definition:Instructions.h:1479

llvm::CastInst

This is the base class for all instructions that perform data casts.

Definition:InstrTypes.h:444

llvm::CastInst::Create

static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...

Definition:Instructions.cpp:2972

llvm::CmpInst

This class is the base class for the comparison instructions.

Definition:InstrTypes.h:661

llvm::CmpInst::Predicate

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Definition:InstrTypes.h:673

llvm::CmpInst::ICMP_SLT

@ ICMP_SLT

signed less than

Definition:InstrTypes.h:702

llvm::CmpInst::ICMP_UGT

@ ICMP_UGT

unsigned greater than

Definition:InstrTypes.h:696

llvm::CmpInst::ICMP_SGT

@ ICMP_SGT

signed greater than

Definition:InstrTypes.h:700

llvm::CmpInst::ICMP_ULT

@ ICMP_ULT

unsigned less than

Definition:InstrTypes.h:698

llvm::CmpInst::ICMP_EQ

@ ICMP_EQ

equal

Definition:InstrTypes.h:694

llvm::CmpInst::ICMP_NE

@ ICMP_NE

not equal

Definition:InstrTypes.h:695

llvm::CmpInst::getSwappedPredicate

Predicate getSwappedPredicate() const

For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.

Definition:InstrTypes.h:825

llvm::CmpInst::Create

static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Construct a compare instruction, given the opcode, the predicate and the two operands.

Definition:Instructions.cpp:3434

llvm::CmpInst::getPredicate

Predicate getPredicate() const

Return the predicate for this instruction.

Definition:InstrTypes.h:763

llvm::CmpPredicate

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

Definition:CmpPredicate.h:22

llvm::CodeGenPreparePass::run

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition:CodeGenPrepare.cpp:545

llvm::ConstantData

Base class for constants with no operands.

Definition:Constants.h:53

llvm::ConstantExpr

A constant value that is initialized with an expression using other constant values.

Definition:Constants.h:1108

llvm::ConstantExpr::getBitCast

static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)

Definition:Constants.cpp:2321

llvm::ConstantExpr::getNeg

static Constant * getNeg(Constant *C, bool HasNSW=false)

Definition:Constants.cpp:2626

llvm::ConstantInt

This is the shared class of boolean and integer constants.

Definition:Constants.h:83

llvm::ConstantInt::getTrue

static ConstantInt * getTrue(LLVMContext &Context)

Definition:Constants.cpp:866

llvm::ConstantInt::isZero

bool isZero() const

This is just a convenience method to make client code smaller for a common code.

Definition:Constants.h:208

llvm::ConstantInt::getSExtValue

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

Definition:Constants.h:163

llvm::ConstantInt::getValue

const APInt & getValue() const

Return the constant as an APInt value reference.

Definition:Constants.h:148

llvm::ConstantVector::getSplat

static Constant * getSplat(ElementCount EC, Constant *Elt)

Return a ConstantVector with the specified constant in each element.

Definition:Constants.cpp:1472

llvm::ConstantVector::get

static Constant * get(ArrayRef< Constant * > V)

Definition:Constants.cpp:1421

llvm::Constant

This is an important base class in LLVM.

Definition:Constant.h:42

llvm::Constant::getAllOnesValue

static Constant * getAllOnesValue(Type *Ty)

Definition:Constants.cpp:420

llvm::Constant::getNullValue

static Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

Definition:Constants.cpp:373

llvm::DWARFExpression::Operation

This class represents an Operation in the Expression.

Definition:DWARFExpression.h:32

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::DataLayout::getIntPtrType

IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const

Returns an integer type with size at least as big as that of a pointer in the given address space.

Definition:DataLayout.cpp:851

llvm::DbgRecord::removeFromParent

void removeFromParent()

Definition:DebugProgramInstruction.cpp:674

llvm::DbgValueInst

This represents the llvm.dbg.value instruction.

Definition:IntrinsicInst.h:468

llvm::DbgVariableIntrinsic::location_ops

iterator_range< location_op_iterator > location_ops() const

Get the locations corresponding to the variable referenced by the debug info intrinsic.

Definition:IntrinsicInst.cpp:91

llvm::DbgVariableIntrinsic::replaceVariableLocationOp

void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)

Definition:IntrinsicInst.cpp:121

llvm::DbgVariableRecord

Record of a variable value-assignment, aka a non instruction representation of the dbg....

Definition:DebugProgramInstruction.h:270

llvm::DbgVariableRecord::Type

LocationType Type

Classification of the debug-info record that this DbgVariableRecord represents.

Definition:DebugProgramInstruction.h:286

llvm::DbgVariableRecord::replaceVariableLocationOp

void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)

Definition:DebugProgramInstruction.cpp:286

llvm::DbgVariableRecord::location_ops

iterator_range< location_op_iterator > location_ops() const

Get the locations corresponding to the variable referenced by the debug info intrinsic.

Definition:DebugProgramInstruction.cpp:234

llvm::DebugLoc

A debug info location.

Definition:DebugLoc.h:33

llvm::DenseMapBase::find

iterator find(const_arg_type_t< KeyT > Val)

Definition:DenseMap.h:156

llvm::DenseMapBase::erase

bool erase(const KeyT &Val)

Definition:DenseMap.h:321

llvm::DenseMapBase::size

unsigned size() const

Definition:DenseMap.h:99

llvm::DenseMapBase::end

iterator end()

Definition:DenseMap.h:84

llvm::DenseMapBase::insert

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Definition:DenseMap.h:211

llvm::DenseMapBase::clear

void clear()

Definition:DenseMap.h:110

llvm::DenseMap

Definition:DenseMap.h:727

llvm::DominatorTreeBase::reset

void reset()

Definition:GenericDomTree.h:909

llvm::DominatorTree

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Definition:Dominators.h:162

llvm::DominatorTree::isReachableFromEntry

bool isReachableFromEntry(const Use &U) const

Provide an overload for a Use.

Definition:Dominators.cpp:321

llvm::DominatorTree::dominates

bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

Definition:Dominators.cpp:122

llvm::ElementCount

Definition:TypeSize.h:300

llvm::ExtractValueInst

This instruction extracts a struct member or array element value from an aggregate value.

Definition:Instructions.h:2397

llvm::ExtractValueInst::indices

iterator_range< idx_iterator > indices() const

Definition:Instructions.h:2435

llvm::FCmpInst

This instruction compares its operands according to the predicate given to the constructor.

Definition:Instructions.h:1379

llvm::FixedVectorType::get

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

Definition:Type.cpp:791

llvm::FortifiedLibCallSimplifier

This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...

Definition:SimplifyLibCalls.h:41

llvm::FreezeInst

This class represents a freeze function that returns random concrete value if an operand is either a ...

Definition:Instructions.h:5088

llvm::FunctionPass

FunctionPass class - This class is used to implement most global optimizations.

Definition:Pass.h:310

llvm::FunctionPass::runOnFunction

virtual bool runOnFunction(Function &F)=0

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function

Definition:Function.h:63

llvm::Function::getEntryBlock

const BasicBlock & getEntryBlock() const

Definition:Function.h:809

llvm::GCProjectionInst::getStatepoint

const Value * getStatepoint() const

The statepoint with which this gc.relocate is associated.

Definition:IntrinsicInst.cpp:842

llvm::GCRelocateInst

Represents calls to the gc.relocate intrinsic.

Definition:IntrinsicInst.h:1802

llvm::GCRelocateInst::getBasePtrIndex

unsigned getBasePtrIndex() const

The index into the associate statepoint's argument list which contains the base pointer of the pointe...

Definition:IntrinsicInst.h:1815

llvm::GCStatepointInst

Represents a gc.statepoint intrinsic call.

Definition:Statepoint.h:61

llvm::GetElementPtrInst

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

Definition:Instructions.h:933

llvm::GetElementPtrInst::getIndexedType

static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)

Returns the result type of a getelementptr with the given source element type and indexes.

Definition:Instructions.cpp:1514

llvm::GlobalObject::setAlignment

void setAlignment(Align Align)

Sets the alignment attribute of the GlobalObject.

Definition:Globals.cpp:143

llvm::GlobalObject::canIncreaseAlignment

bool canIncreaseAlignment() const

Returns true if the alignment of the value can be unilaterally increased.

Definition:Globals.cpp:310

llvm::GlobalValue

Definition:GlobalValue.h:48

llvm::GlobalValue::isThreadLocal

bool isThreadLocal() const

If the value is "Thread Local", its value isn't shared by the threads.

Definition:GlobalValue.h:264

llvm::GlobalValue::getValueType

Type * getValueType() const

Definition:GlobalValue.h:297

llvm::GlobalVariable

Definition:GlobalVariable.h:39

llvm::ICmpInst

This instruction compares its operands according to the predicate given to the constructor.

Definition:Instructions.h:1158

llvm::ICmpInst::isEquality

bool isEquality() const

Return true if this predicate is either EQ or NE.

Definition:Instructions.h:1291

llvm::IRBuilderBase::CreateZExtOrBitCast

Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")

Definition:IRBuilder.h:2162

llvm::IRBuilderBase::getTrue

ConstantInt * getTrue()

Get the constant value for i1 true.

Definition:IRBuilder.h:485

llvm::IRBuilderBase::CreateSelect

Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

Definition:IRBuilder.cpp:1053

llvm::IRBuilderBase::CreateFreeze

Value * CreateFreeze(Value *V, const Twine &Name="")

Definition:IRBuilder.h:2574

llvm::IRBuilderBase::SetCurrentDebugLocation

void SetCurrentDebugLocation(DebugLoc L)

Set location information used by debugging information.

Definition:IRBuilder.h:239

llvm::IRBuilderBase::CreateNUWAdd

Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")

Definition:IRBuilder.h:1383

llvm::IRBuilderBase::CreateGEP

Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())

Definition:IRBuilder.h:1874

llvm::IRBuilderBase::createIsFPClass

Value * createIsFPClass(Value *FPNum, unsigned Test)

Definition:IRBuilder.cpp:1248

llvm::IRBuilderBase::CreateCmp

Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

Definition:IRBuilder.h:2404

llvm::IRBuilderBase::CreatePHI

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

Definition:IRBuilder.h:2435

llvm::IRBuilderBase::CreateICmpEQ

Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")

Definition:IRBuilder.h:2270

llvm::IRBuilderBase::CreateBitCast

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

Definition:IRBuilder.h:2152

llvm::IRBuilderBase::CreateCondBr

BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)

Create a conditional 'br Cond, TrueDest, FalseDest' instruction.

Definition:IRBuilder.h:1164

llvm::IRBuilderBase::SetInsertPoint

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Definition:IRBuilder.h:199

llvm::IRBuilderBase::CreateAlignedStore

StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)

Definition:IRBuilder.h:1834

llvm::IRBuilderBase::CreateICmp

Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")

Definition:IRBuilder.h:2380

llvm::IRBuilderBase::getInt

ConstantInt * getInt(const APInt &AI)

Get a constant integer value.

Definition:IRBuilder.h:521

llvm::IRBuilder

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Definition:IRBuilder.h:2705

llvm::InlineAsm

Definition:InlineAsm.h:34

llvm::InlineAsm::isInput

@ isInput

Definition:InlineAsm.h:96

llvm::InstructionCost

Definition:InstructionCost.h:29

llvm::Instruction

Definition:Instruction.h:68

llvm::Instruction::clone

Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

Definition:Instruction.cpp:1364

llvm::Instruction::removeFromParent

void removeFromParent()

This method unlinks 'this' from the containing basic block, but does not delete it.

Definition:Instruction.cpp:80

llvm::Instruction::setHasNoSignedWrap

void setHasNoSignedWrap(bool b=true)

Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.

Definition:Instruction.cpp:386

llvm::Instruction::insertBefore

void insertBefore(Instruction *InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified instruction.

Definition:Instruction.cpp:99

llvm::Instruction::getDebugLoc

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

Definition:Instruction.h:511

llvm::Instruction::getPrevNonDebugInstruction

const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const

Return a pointer to the previous non-debug instruction in the same basic block as 'this',...

Definition:Instruction.cpp:1234

llvm::Instruction::moveAfter

void moveAfter(Instruction *MovePos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

Definition:Instruction.cpp:191

llvm::Instruction::hasMetadata

bool hasMetadata() const

Return true if this instruction has any metadata attached to it.

Definition:Instruction.h:404

llvm::Instruction::isEHPad

bool isEHPad() const

Return true if the instruction is a variety of EH-block.

Definition:Instruction.h:869

llvm::Instruction::eraseFromParent

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Definition:Instruction.cpp:94

llvm::Instruction::user_back

Instruction * user_back()

Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...

Definition:Instruction.h:169

llvm::Instruction::getFunction

const Function * getFunction() const

Return the function this instruction belongs to.

Definition:Instruction.cpp:72

llvm::Instruction::comesBefore

bool comesBefore(const Instruction *Other) const

Given an instruction Other in the same basic block as this instruction, return true if this instructi...

Definition:Instruction.cpp:334

llvm::Instruction::setMetadata

void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

Definition:Metadata.cpp:1679

llvm::Instruction::getOpcode

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Definition:Instruction.h:310

llvm::Instruction::BinaryOps

BinaryOps

Definition:Instruction.h:1008

llvm::Instruction::isShift

bool isShift() const

Definition:Instruction.h:318

llvm::Instruction::dropPoisonGeneratingFlags

void dropPoisonGeneratingFlags()

Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.

Definition:Instruction.cpp:426

llvm::Instruction::getDbgReinsertionPosition

std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()

Return an iterator to the position of the "Next" DbgRecord after this instruction,...

Definition:Instruction.cpp:267

llvm::Instruction::setDebugLoc

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

Definition:Instruction.h:508

llvm::Instruction::copyMetadata

void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

Definition:Instruction.cpp:1345

llvm::Instruction::insertAfter

void insertAfter(Instruction *InsertPos)

Insert an unlinked instruction into a basic block immediately after the specified instruction.

Definition:Instruction.cpp:111

llvm::Instruction::moveBefore

void moveBefore(Instruction *MovePos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

Definition:Instruction.cpp:175

llvm::Instruction::CastOps

CastOps

Definition:Instruction.h:1022

llvm::IntrinsicInst

A wrapper class for inspecting calls to intrinsic functions.

Definition:IntrinsicInst.h:48

llvm::IntrinsicInst::getIntrinsicID

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

Definition:IntrinsicInst.h:55

llvm::InvokeInst

Invoke instruction.

Definition:Instructions.h:3670

llvm::LLVMContext

This is an important class for using LLVM in a threaded context.

Definition:LLVMContext.h:67

llvm::LoadInst

An instruction for reading from memory.

Definition:Instructions.h:176

llvm::LoadInst::getPointerAddressSpace

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

Definition:Instructions.h:261

llvm::LoopAnalysis

Analysis pass that exposes the LoopInfo for a function.

Definition:LoopInfo.h:566

llvm::LoopInfoBase::getLoopFor

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

Definition:GenericLoopInfo.h:606

llvm::LoopInfoWrapperPass

The legacy pass manager's analysis pass to compute loop information.

Definition:LoopInfo.h:593

llvm::LoopInfo

Definition:LoopInfo.h:407

llvm::Loop

Represents a single loop in the control flow graph.

Definition:LoopInfo.h:39

llvm::MDBuilder

Definition:MDBuilder.h:36

llvm::MDBuilder::createBranchWeights

MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)

Return metadata containing two branch weights.

Definition:MDBuilder.cpp:37

llvm::MVT

Machine Value Type.

Definition:MachineValueType.h:35

llvm::MVT::getSizeInBits

TypeSize getSizeInBits() const

Returns the size of the specified MVT in bits.

Definition:MachineValueType.h:308

llvm::MVT::getIntegerVT

static MVT getIntegerVT(unsigned BitWidth)

Definition:MachineValueType.h:441

llvm::MachineBasicBlock::replacePhiUsesWith

void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)

Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.

Definition:MachineBasicBlock.cpp:1503

llvm::MapVector

This class implements a map that also provides access to all stored values in a deterministic order.

Definition:MapVector.h:36

llvm::MapVector::end

iterator end()

Definition:MapVector.h:71

llvm::MapVector::erase

VectorType::iterator erase(typename VectorType::iterator Iterator)

Remove the element given by Iterator.

Definition:MapVector.h:193

llvm::MapVector::find

iterator find(const KeyT &Key)

Definition:MapVector.h:167

llvm::MapVector::empty

bool empty() const

Definition:MapVector.h:79

llvm::MapVector::insert

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Definition:MapVector.h:141

llvm::MapVector::clear

void clear()

Definition:MapVector.h:88

llvm::MemIntrinsic

This is the common base class for memset/memcpy/memmove.

Definition:IntrinsicInst.h:1205

llvm::MemTransferInst

This class wraps the llvm.memcpy/memmove intrinsics.

Definition:IntrinsicInst.h:1302

llvm::OuterAnalysisManagerProxy

An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...

Definition:PassManager.h:692

llvm::PHINode

Definition:Instructions.h:2600

llvm::PHINode::addIncoming

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

Definition:Instructions.h:2735

llvm::PHINode::incoming_values

op_range incoming_values()

Definition:Instructions.h:2665

llvm::PHINode::getIncomingValueForBlock

Value * getIncomingValueForBlock(const BasicBlock *BB) const

Definition:Instructions.h:2775

llvm::PHINode::getIncomingBlock

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Definition:Instructions.h:2695

llvm::PHINode::getIncomingValue

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

Definition:Instructions.h:2675

llvm::PHINode::getNumIncomingValues

unsigned getNumIncomingValues() const

Return the number of incoming edges.

Definition:Instructions.h:2671

llvm::PHINode::Create

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

Definition:Instructions.h:2635

llvm::PassRegistry::getPassRegistry

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Definition:PassRegistry.cpp:24

llvm::Pass::getAnalysisUsage

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

Definition:Pass.cpp:98

llvm::Pass::getPassName

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

Definition:Pass.cpp:81

llvm::PointerIntPair

PointerIntPair - This class implements a pair of a pointer and small integer.

Definition:PointerIntPair.h:80

llvm::PoisonValue

In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...

Definition:Constants.h:1460

llvm::PoisonValue::get

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition:Constants.cpp:1878

llvm::PreservedAnalyses

A set of analyses that are preserved following a run of a transformation pass.

Definition:Analysis.h:111

llvm::PreservedAnalyses::all

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Definition:Analysis.h:117

llvm::PreservedAnalyses::preserve

void preserve()

Mark an analysis as preserved.

Definition:Analysis.h:131

llvm::ProfileSummaryAnalysis

An analysis pass based on the new PM to deliver ProfileSummaryInfo.

Definition:ProfileSummaryInfo.h:372

llvm::ProfileSummaryInfoWrapperPass

An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.

Definition:ProfileSummaryInfo.h:353

llvm::ProfileSummaryInfo

Analysis providing profile information.

Definition:ProfileSummaryInfo.h:41

llvm::ReturnInst

Return a value (possibly void), from a function.

Definition:Instructions.h:2938

llvm::ReturnInst::getReturnValue

Value * getReturnValue() const

Convenience accessor. Returns null if there is no return value.

Definition:Instructions.h:2980

llvm::SDValue

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

Definition:SelectionDAGNodes.h:145

llvm::SelectInst

This class represents the LLVM 'select' instruction.

Definition:Instructions.h:1657

llvm::SelectInst::Create

static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)

Definition:Instructions.h:1682

llvm::SetVector

A vector that has set insertion semantics.

Definition:SetVector.h:57

llvm::SetVector::count

size_type count(const key_type &key) const

Count the number of elements of a given key in the SetVector.

Definition:SetVector.h:264

llvm::SetVector::empty

bool empty() const

Determine if the SetVector is empty or not.

Definition:SetVector.h:93

llvm::SetVector::insert

bool insert(const value_type &X)

Insert a new element into the SetVector.

Definition:SetVector.h:162

llvm::SetVector::pop_back_val

value_type pop_back_val()

Definition:SetVector.h:285

llvm::ShuffleVectorInst

This instruction constructs a fixed permutation of two input vectors.

Definition:Instructions.h:1901

llvm::ShuffleVectorInst::getType

VectorType * getType() const

Overload to return most specific vector type.

Definition:Instructions.h:1941

llvm::SmallDenseMap

Definition:DenseMap.h:883

llvm::SmallDenseSet

Implements a dense probed hash-table based set with some number of buckets stored inline.

Definition:DenseSet.h:298

llvm::SmallPtrSetImplBase::size

size_type size() const

Definition:SmallPtrSet.h:94

llvm::SmallPtrSetImplBase::clear

void clear()

Definition:SmallPtrSet.h:97

llvm::SmallPtrSetImplBase::empty

bool empty() const

Definition:SmallPtrSet.h:93

llvm::SmallPtrSetImpl

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

Definition:SmallPtrSet.h:363

llvm::SmallPtrSetImpl::erase

bool erase(PtrType Ptr)

Remove pointer from the set.

Definition:SmallPtrSet.h:401

llvm::SmallPtrSetImpl::count

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

Definition:SmallPtrSet.h:452

llvm::SmallPtrSetImpl::insert

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

Definition:SmallPtrSet.h:384

llvm::SmallPtrSet

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

Definition:SmallPtrSet.h:519

llvm::SmallSetVector

A SetVector that performs no allocations if smaller than a certain size.

Definition:SetVector.h:370

llvm::SmallSet

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

Definition:SmallSet.h:132

llvm::SmallSet::count

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

Definition:SmallSet.h:175

llvm::SmallSet::erase

bool erase(const T &V)

Definition:SmallSet.h:193

llvm::SmallSet::clear

void clear()

Definition:SmallSet.h:204

llvm::SmallSet::contains

bool contains(const T &V) const

Check if the SmallSet contains the given element.

Definition:SmallSet.h:222

llvm::SmallSet::insert

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

Definition:SmallSet.h:181

llvm::SmallVectorBase::empty

bool empty() const

Definition:SmallVector.h:81

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::pop_back_val

T pop_back_val()

Definition:SmallVector.h:673

llvm::SmallVectorImpl::emplace_back

reference emplace_back(ArgTypes &&... Args)

Definition:SmallVector.h:937

llvm::SmallVectorImpl::reserve

void reserve(size_type N)

Definition:SmallVector.h:663

llvm::SmallVectorImpl::erase

iterator erase(const_iterator CI)

Definition:SmallVector.h:737

llvm::SmallVectorImpl::clear

void clear()

Definition:SmallVector.h:610

llvm::SmallVectorImpl::iterator

typename SuperClass::iterator iterator

Definition:SmallVector.h:577

llvm::SmallVectorImpl::resize

void resize(size_type N)

Definition:SmallVector.h:638

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVectorTemplateCommon::end

iterator end()

Definition:SmallVector.h:269

llvm::SmallVectorTemplateCommon::front

reference front()

Definition:SmallVector.h:299

llvm::SmallVectorTemplateCommon::begin

iterator begin()

Definition:SmallVector.h:267

llvm::SmallVectorTemplateCommon::back

reference back()

Definition:SmallVector.h:308

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::StoreInst

An instruction for storing to memory.

Definition:Instructions.h:292

llvm::StoreInst::getPointerOperandIndex

static unsigned getPointerOperandIndex()

Definition:Instructions.h:383

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::StructLayout

Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...

Definition:DataLayout.h:567

llvm::StructLayout::getElementOffset

TypeSize getElementOffset(unsigned Idx) const

Definition:DataLayout.h:596

llvm::StructType

Class to represent struct types.

Definition:DerivedTypes.h:218

llvm::SwitchInst::CaseHandle

Definition:Instructions.h:3250

llvm::SwitchInst

Multiway switch.

Definition:Instructions.h:3154

llvm::TargetIRAnalysis

Analysis pass providing the TargetTransformInfo.

Definition:TargetTransformInfo.h:3194

llvm::TargetLibraryAnalysis

Analysis pass providing the TargetLibraryInfo.

Definition:TargetLibraryInfo.h:614

llvm::TargetLibraryInfoWrapperPass

Definition:TargetLibraryInfo.h:639

llvm::TargetLibraryInfo

Provides information about what library functions are available for the current target.

Definition:TargetLibraryInfo.h:280

llvm::TargetLibraryInfo::getLibFunc

bool getLibFunc(StringRef funcName, LibFunc &F) const

Searches for a particular function name.

Definition:TargetLibraryInfo.h:345

llvm::TargetLoweringBase::InstructionOpcodeToISD

int InstructionOpcodeToISD(unsigned Opcode) const

Get the ISD node that corresponds to the Instruction class opcode.

Definition:TargetLoweringBase.cpp:1765

llvm::TargetLoweringBase::getValueType

EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Return the EVT corresponding to this LLVM type.

Definition:TargetLowering.h:1677

llvm::TargetLoweringBase::isSelectSupported

virtual bool isSelectSupported(SelectSupportKind) const

Definition:TargetLowering.h:454

llvm::TargetLoweringBase::isEqualityCmpFoldedWithSignedCmp

virtual bool isEqualityCmpFoldedWithSignedCmp() const

Return true if instruction generated for equality comparison is folded with instruction generated for...

Definition:TargetLowering.h:734

llvm::TargetLoweringBase::shouldFormOverflowOp

virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const

Try to convert math with an overflow comparison into the corresponding DAG node operation.

Definition:TargetLowering.h:3354

llvm::TargetLoweringBase::isMaskAndCmp0FoldingBeneficial

virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const

Return if the target supports combining a chain like:

Definition:TargetLowering.h:756

llvm::TargetLoweringBase::isExtLoad

bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const

Return true if Load and Ext can form an ExtLoad.

Definition:TargetLowering.h:3033

llvm::TargetLoweringBase::isSExtCheaperThanZExt

virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const

Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.

Definition:TargetLowering.h:3085

llvm::TargetLoweringBase::getTargetMachine

const TargetMachine & getTargetMachine() const

Definition:TargetLowering.h:364

llvm::TargetLoweringBase::isZExtFree

virtual bool isZExtFree(Type *FromTy, Type *ToTy) const

Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...

Definition:TargetLowering.h:3066

llvm::TargetLoweringBase::TypePromoteInteger

@ TypePromoteInteger

Definition:TargetLowering.h:211

llvm::TargetLoweringBase::enableExtLdPromotion

bool enableExtLdPromotion() const

Return true if the target wants to use the optimization that turns ext(promotableInst1(....

Definition:TargetLowering.h:946

llvm::TargetLoweringBase::isCheapToSpeculateCttz

virtual bool isCheapToSpeculateCttz(Type *Ty) const

Return true if it is cheap to speculate a call to intrinsic cttz.

Definition:TargetLowering.h:707

llvm::TargetLoweringBase::isJumpExpensive

bool isJumpExpensive() const

Return true if Flow Control is an expensive operation that should be avoided.

Definition:TargetLowering.h:617

llvm::TargetLoweringBase::hasExtractBitsInsn

bool hasExtractBitsInsn() const

Return true if the target has BitExtract instructions.

Definition:TargetLowering.h:513

llvm::TargetLoweringBase::SelectSupportKind

SelectSupportKind

Enum that describes what type of support for selects the target has.

Definition:TargetLowering.h:241

llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses

virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const

Determine if the target supports unaligned memory accesses.

Definition:TargetLowering.h:1918

llvm::TargetLoweringBase::isSlowDivBypassed

bool isSlowDivBypassed() const

Returns true if target has indicated at least one type should be bypassed.

Definition:TargetLowering.h:604

llvm::TargetLoweringBase::isTruncateFree

virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const

Return true if it's free to truncate a value of type FromTy to type ToTy.

Definition:TargetLowering.h:2972

llvm::TargetLoweringBase::getTypeToTransformTo

virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const

For types supported by the target, this is an identity function.

Definition:TargetLowering.h:1156

llvm::TargetLoweringBase::getPreferredSwitchConditionType

virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const

Returns preferred type for switch condition.

Definition:TargetLoweringBase.cpp:1652

llvm::TargetLoweringBase::isCondCodeLegal

bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const

Return true if the specified condition code is legal for a comparison of the specified types on this ...

Definition:TargetLowering.h:1630

llvm::TargetLoweringBase::canCombineStoreAndExtract

virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const

Return true if the target can combine store(extractelement VectorTy, Idx).

Definition:TargetLowering.h:951

llvm::TargetLoweringBase::isTypeLegal

bool isTypeLegal(EVT VT) const

Return true if the target has native support for the specified value type.

Definition:TargetLowering.h:1093

llvm::TargetLoweringBase::isFreeAddrSpaceCast

virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const

Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.

Definition:TargetLoweringBase.cpp:918

llvm::TargetLoweringBase::shouldConsiderGEPOffsetSplit

virtual bool shouldConsiderGEPOffsetSplit() const

Definition:TargetLowering.h:3380

llvm::TargetLoweringBase::hasMultipleConditionRegisters

bool hasMultipleConditionRegisters() const

Return true if multiple condition registers are available.

Definition:TargetLowering.h:508

llvm::TargetLoweringBase::isExtFree

bool isExtFree(const Instruction *I) const

Return true if the extension represented by I is free.

Definition:TargetLowering.h:3008

llvm::TargetLoweringBase::isOperationLegalOrCustom

bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const

Return true if the specified operation is legal on this target or can be made legal with custom lower...

Definition:TargetLowering.h:1339

llvm::TargetLoweringBase::isPredictableSelectExpensive

bool isPredictableSelectExpensive() const

Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...

Definition:TargetLowering.h:657

llvm::TargetLoweringBase::isMultiStoresCheaperThanBitsMerge

virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const

Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...

Definition:TargetLowering.h:742

llvm::TargetLoweringBase::getAddrModeArguments

virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const

CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...

Definition:TargetLowering.h:2802

llvm::TargetLoweringBase::isLoadExtLegal

bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const

Return true if the specified load with extension is legal on this target.

Definition:TargetLowering.h:1467

llvm::TargetLoweringBase::getBypassSlowDivWidths

const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const

Returns map of slow types for division or remainder with corresponding fast types.

Definition:TargetLowering.h:608

llvm::TargetLoweringBase::isCheapToSpeculateCtlz

virtual bool isCheapToSpeculateCtlz(Type *Ty) const

Return true if it is cheap to speculate a call to intrinsic ctlz.

Definition:TargetLowering.h:712

llvm::TargetLoweringBase::useSoftFloat

virtual bool useSoftFloat() const

Definition:TargetLowering.h:366

llvm::TargetLoweringBase::getPreferredLargeGEPBaseOffset

virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const

Return the prefered common base offset.

Definition:TargetLowering.h:2846

llvm::TargetLoweringBase::getTypeAction

LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const

Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...

Definition:TargetLowering.h:1143

llvm::TargetLoweringBase::shouldAlignPointerArgs

virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const

Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...

Definition:TargetLowering.h:2114

llvm::TargetLoweringBase::shouldConvertSplatType

virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const

Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...

Definition:TargetLowering.h:2884

llvm::TargetLoweringBase::addressingModeSupportsTLS

virtual bool addressingModeSupportsTLS(const GlobalValue &) const

Returns true if the targets addressing mode can target thread local storage (TLS).

Definition:TargetLowering.h:2841

llvm::TargetLoweringBase::shouldConvertPhiType

virtual bool shouldConvertPhiType(Type *From, Type *To) const

Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...

Definition:TargetLowering.h:2891

llvm::TargetLoweringBase::isFAbsFree

virtual bool isFAbsFree(EVT VT) const

Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...

Definition:TargetLowering.h:3223

llvm::TargetLoweringBase::preferZeroCompareBranch

virtual bool preferZeroCompareBranch() const

Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.

Definition:TargetLowering.h:738

llvm::TargetLoweringBase::isLegalAddressingMode

virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const

Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...

Definition:TargetLoweringBase.cpp:1911

llvm::TargetLoweringBase::optimizeExtendOrTruncateConversion

virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const

Try to optimize extending or truncating conversion instructions (like zext, trunc,...

Definition:TargetLowering.h:3096

llvm::TargetLowering

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Definition:TargetLowering.h:3780

llvm::TargetLowering::C_Memory

@ C_Memory

Definition:TargetLowering.h:4953

llvm::TargetLowering::AsmOperandInfoVector

std::vector< AsmOperandInfo > AsmOperandInfoVector

Definition:TargetLowering.h:5008

llvm::TargetLowering::ExpandInlineAsm

virtual bool ExpandInlineAsm(CallInst *) const

This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.

Definition:TargetLowering.h:4946

llvm::TargetLowering::ParseConstraints

virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const

Split up the constraint string from the inline assembly value into the specific constraints and their...

Definition:TargetLowering.cpp:5731

llvm::TargetLowering::ComputeConstraintToUse

virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const

Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...

Definition:TargetLowering.cpp:6088

llvm::TargetLowering::mayBeEmittedAsTailCall

virtual bool mayBeEmittedAsTailCall(const CallInst *) const

Return true if the target may be able emit the call instruction as a tail call.

Definition:TargetLowering.h:4814

llvm::TargetMachine

Primary interface to the complete machine description for the target machine.

Definition:TargetMachine.h:77

llvm::TargetMachine::isNoopAddrSpaceCast

virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const

Returns true if a cast between SrcAS and DestAS is a noop.

Definition:TargetMachine.h:330

llvm::TargetPassConfig

Target-Independent Code Generator Pass Configuration Options.

Definition:TargetPassConfig.h:85

llvm::TargetRegisterInfo

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

Definition:TargetRegisterInfo.h:235

llvm::TargetSubtargetInfo

TargetSubtargetInfo - Generic base class for all target subtargets.

Definition:TargetSubtargetInfo.h:63

llvm::TargetSubtargetInfo::getRegisterInfo

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

Definition:TargetSubtargetInfo.h:129

llvm::TargetSubtargetInfo::getTargetLowering

virtual const TargetLowering * getTargetLowering() const

Definition:TargetSubtargetInfo.h:101

llvm::TargetSubtargetInfo::addrSinkUsingGEPs

virtual bool addrSinkUsingGEPs() const

Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.

Definition:TargetSubtargetInfo.h:301

llvm::TargetTransformInfoWrapperPass

Wrapper pass for TargetTransformInfo.

Definition:TargetTransformInfo.h:3250

llvm::TargetTransformInfo

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Definition:TargetTransformInfo.h:212

llvm::TargetTransformInfo::isExpensiveToSpeculativelyExecute

bool isExpensiveToSpeculativelyExecute(const Instruction *I) const

Return true if the cost of the instruction is too high to speculatively execute and should be kept be...

Definition:TargetTransformInfo.cpp:703

llvm::TargetTransformInfo::TargetCostKind

TargetCostKind

The kind of cost model.

Definition:TargetTransformInfo.h:263

llvm::TargetTransformInfo::TCK_RecipThroughput

@ TCK_RecipThroughput

Reciprocal throughput.

Definition:TargetTransformInfo.h:264

llvm::TargetTransformInfo::TCK_SizeAndLatency

@ TCK_SizeAndLatency

The weighted sum of size and latency.

Definition:TargetTransformInfo.h:267

llvm::TargetTransformInfo::getArithmeticInstrCost

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const

This is an approximation of reciprocal throughput of a math/logic op.

Definition:TargetTransformInfo.cpp:940

llvm::TargetTransformInfo::getIntImmCost

InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const

Return the expected cost of materializing for the given integer immediate of the specified type.

Definition:TargetTransformInfo.cpp:728

llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion

bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const

Definition:TargetTransformInfo.cpp:817

llvm::TargetTransformInfo::getPredictableBranchThreshold

BranchProbability getPredictableBranchThreshold() const

If a branch or a select condition is skewed in one direction by more than this factor,...

Definition:TargetTransformInfo.cpp:279

llvm::TargetTransformInfo::TCC_Basic

@ TCC_Basic

The cost of a typical 'add' instruction.

Definition:TargetTransformInfo.h:290

llvm::TargetTransformInfo::isVectorShiftByScalarCheap

bool isVectorShiftByScalarCheap(Type *Ty) const

Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...

Definition:TargetTransformInfo.cpp:1434

llvm::TargetTransformInfo::isProfitableToSinkOperands

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const

Return true if sinking I's operands to the same basic block as I is profitable, e....

Definition:TargetTransformInfo.cpp:1429

llvm::TargetTransformInfo::getVectorInstrCost

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const

Definition:TargetTransformInfo.cpp:1079

llvm::TargetTransformInfo::OK_UniformConstantValue

@ OK_UniformConstantValue

Definition:TargetTransformInfo.h:1121

llvm::TruncInst

This class represents a truncation of integer types.

Definition:Instructions.h:4503

llvm::TypeSize

Definition:TypeSize.h:334

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::getIntegerBitWidth

unsigned getIntegerBitWidth() const

llvm::Type::isVectorTy

bool isVectorTy() const

True if this is an instance of VectorType.

Definition:Type.h:270

llvm::Type::getPointerAddressSpace

unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

llvm::Type::getIntNTy

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::getScalarSizeInBits

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::isScalableTy

bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const

Return true if this is a type whose size is a known multiple of vscale.

llvm::Type::isIntOrPtrTy

bool isIntOrPtrTy() const

Return true if this is an integer type or a pointer type.

Definition:Type.h:252

llvm::Type::getInt32Ty

static IntegerType * getInt32Ty(LLVMContext &C)

llvm::Type::isIntegerTy

bool isIntegerTy() const

True if this is an instance of IntegerType.

Definition:Type.h:237

llvm::Type::getScalarType

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

Definition:Type.h:355

llvm::UndefValue::get

static UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

Definition:Constants.cpp:1859

llvm::Use

A Use represents the edge between a Value definition and its users.

Definition:Use.h:43

llvm::User

Definition:User.h:44

llvm::User::operands

op_range operands()

Definition:User.h:288

llvm::User::replaceUsesOfWith

bool replaceUsesOfWith(Value *From, Value *To)

Replace uses of one Value with another.

Definition:User.cpp:21

llvm::User::getOperandUse

const Use & getOperandUse(unsigned i) const

Definition:User.h:241

llvm::User::setOperand

void setOperand(unsigned i, Value *Val)

Definition:User.h:233

llvm::User::getOperand

Value * getOperand(unsigned i) const

Definition:User.h:228

llvm::User::getNumOperands

unsigned getNumOperands() const

Definition:User.h:250

llvm::ValueMap

See the file comment.

Definition:ValueMap.h:84

llvm::ValueMap::clear

void clear()

Definition:ValueMap.h:145

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::stripAndAccumulateInBoundsConstantOffsets

const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const

This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...

Definition:Value.h:740

llvm::Value::user_begin

user_iterator user_begin()

Definition:Value.h:397

llvm::Value::setName

void setName(const Twine &Name)

Change the name of the value.

Definition:Value.cpp:377

llvm::Value::hasOneUse

bool hasOneUse() const

Return true if there is exactly one use of this value.

Definition:Value.h:434

llvm::Value::replaceAllUsesWith

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

Definition:Value.cpp:534

llvm::Value::users

iterator_range< user_iterator > users()

Definition:Value.h:421

llvm::Value::getPointerAlignment

Align getPointerAlignment(const DataLayout &DL) const

Returns an alignment of the pointer value.

Definition:Value.cpp:927

llvm::Value::isUsedInBasicBlock

bool isUsedInBasicBlock(const BasicBlock *BB) const

Check if this value is used in the specified basic block.

Definition:Value.cpp:234

llvm::Value::hasNUsesOrMore

bool hasNUsesOrMore(unsigned N) const

Return true if this value has N uses or more.

Definition:Value.cpp:153

llvm::Value::stripPointerCasts

const Value * stripPointerCasts() const

Strip off pointer casts, all-zero GEPs and address space casts.

Definition:Value.cpp:694

llvm::Value::use_empty

bool use_empty() const

Definition:Value.h:344

llvm::Value::user_end

user_iterator user_end()

Definition:Value.h:405

llvm::Value::getContext

LLVMContext & getContext() const

All values hold a context through their type.

Definition:Value.cpp:1075

llvm::Value::getNumUses

unsigned getNumUses() const

This method computes the number of uses of this Value.

Definition:Value.cpp:255

llvm::Value::uses

iterator_range< use_iterator > uses()

Definition:Value.h:376

llvm::Value::mutateType

void mutateType(Type *Ty)

Mutate the type of this Value to be of the specified type.

Definition:Value.h:819

llvm::Value::user_iterator

user_iterator_impl< User > user_iterator

Definition:Value.h:390

llvm::Value::getName

StringRef getName() const

Return a constant reference to the value's name.

Definition:Value.cpp:309

llvm::Value::takeName

void takeName(Value *V)

Transfer the name from V to this value.

Definition:Value.cpp:383

llvm::Value::dump

void dump() const

Support for debugging, callable in GDB: V->dump()

Definition:AsmWriter.cpp:5304

llvm::WeakTrackingVH

Value handle that is nullable, but tries to track the Value.

Definition:ValueHandle.h:204

llvm::WeakTrackingVH::pointsToAliveValue

bool pointsToAliveValue() const

Definition:ValueHandle.h:224

llvm::ZExtInst

This class represents zero extension of integer types.

Definition:Instructions.h:4569

llvm::cl::Option::getNumOccurrences

int getNumOccurrences() const

Definition:CommandLine.h:399

llvm::cl::opt

Definition:CommandLine.h:1423

llvm::details::FixedOrScalableQuantity::getFixedValue

constexpr ScalarTy getFixedValue() const

Definition:TypeSize.h:202

llvm::details::FixedOrScalableQuantity::isNonZero

constexpr bool isNonZero() const

Definition:TypeSize.h:158

llvm::details::FixedOrScalableQuantity::isScalable

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

Definition:TypeSize.h:171

llvm::generic_gep_type_iterator

Definition:GetElementPtrTypeIterator.h:31

llvm::generic_gep_type_iterator::getStructTypeOrNull

StructType * getStructTypeOrNull() const

Definition:GetElementPtrTypeIterator.h:166

llvm::generic_gep_type_iterator::getSequentialElementStride

TypeSize getSequentialElementStride(const DataLayout &DL) const

Definition:GetElementPtrTypeIterator.h:154

llvm::ilist_detail::node_parent_access::getParent

const ParentTy * getParent() const

Definition:ilist_node.h:32

llvm::ilist_node_impl::getIterator

self_iterator getIterator()

Definition:ilist_node.h:132

llvm::ilist_node_with_parent::getNextNode

NodeTy * getNextNode()

Get the next node, or nullptr for the list tail.

Definition:ilist_node.h:353

llvm::raw_ostream

This class implements an extremely fast bulk output stream that can only output to a stream.

Definition:raw_ostream.h:52

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

TargetMachine.h

false

Definition:StackSlotColoring.cpp:193

llvm::AArch64PACKey::IB

@ IB

Definition:AArch64BaseInfo.h:876

llvm::AMDGPUISD::BFI

@ BFI

Definition:AMDGPUISelLowering.h:496

llvm::ARM_MB::ST

@ ST

Definition:ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M

@ M

llvm::COFF::Entry

@ Entry

Definition:COFF.h:844

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition:CallingConv.h:24

llvm::HexagonMCInstrInfo::getAddrMode

unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)

Definition:HexagonMCInstrInfo.cpp:248

llvm::ISD::UADDO

@ UADDO

Definition:ISDOpcodes.h:331

llvm::ISD::USUBO

@ USUBO

Definition:ISDOpcodes.h:335

llvm::ISD::BITREVERSE

@ BITREVERSE

Definition:ISDOpcodes.h:748

llvm::ISD::ZEXTLOAD

@ ZEXTLOAD

Definition:ISDOpcodes.h:1590

llvm::M68kBeads::Term

@ Term

Definition:M68kBaseInfo.h:116

llvm::M68k::MemAddrModeKind::U

@ U

llvm::M68k::MemAddrModeKind::V

@ V

llvm::M68k::MemAddrModeKind::f

@ f

llvm::M68k::MemAddrModeKind::L

@ L

llvm::MipsISD::Ext

@ Ext

Definition:MipsISelLowering.h:157

llvm::NVPTXISD::Dummy

@ Dummy

Definition:NVPTXISelLowering.h:70

llvm::NVPTX::PTXLdStInstCode::V2

@ V2

Definition:NVPTX.h:163

llvm::PatternMatch

Definition:PatternMatch.h:47

llvm::PatternMatch::m_AllOnes

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

Definition:PatternMatch.h:524

llvm::PatternMatch::m_Add

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1102

llvm::PatternMatch::m_BinOp

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

Definition:PatternMatch.h:100

llvm::PatternMatch::m_NUWAdd

OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1314

llvm::PatternMatch::m_URem

BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1198

llvm::PatternMatch::m_Constant

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

Definition:PatternMatch.h:165

llvm::PatternMatch::m_SpecificInt

specific_intval< false > m_SpecificInt(const APInt &V)

Match a specific integer value or vector with all elements equal to the value.

Definition:PatternMatch.h:982

llvm::PatternMatch::match

bool match(Val *V, const Pattern &P)

Definition:PatternMatch.h:49

llvm::PatternMatch::m_Instruction

bind_ty< Instruction > m_Instruction(Instruction *&I)

Match an instruction, capturing it if we match.

Definition:PatternMatch.h:826

llvm::PatternMatch::m_Specific

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

Definition:PatternMatch.h:885

llvm::PatternMatch::m_Shr

BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)

Matches logical shift operations.

Definition:PatternMatch.h:1525

llvm::PatternMatch::m_c_NUWAdd

OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1323

llvm::PatternMatch::m_ConstantInt

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

Definition:PatternMatch.h:168

llvm::PatternMatch::m_One

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

Definition:PatternMatch.h:592

llvm::PatternMatch::m_Select

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

Definition:PatternMatch.h:1799

llvm::PatternMatch::m_ZeroInt

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

Definition:PatternMatch.h:599

llvm::PatternMatch::m_OneUse

OneUse_match< T > m_OneUse(const T &SubPattern)

Definition:PatternMatch.h:67

llvm::PatternMatch::m_LogicalOr

auto m_LogicalOr()

Matches L || R where L and R are arbitrary values.

Definition:PatternMatch.h:3099

llvm::PatternMatch::m_Shuffle

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

Definition:PatternMatch.h:1911

llvm::PatternMatch::m_ImmConstant

match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()

Match an arbitrary immediate Constant and ignore it.

Definition:PatternMatch.h:864

llvm::PatternMatch::m_ZExt

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

Definition:PatternMatch.h:2107

llvm::PatternMatch::m_Cmp

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

Definition:PatternMatch.h:105

llvm::PatternMatch::m_Br

brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)

Definition:PatternMatch.h:2220

llvm::PatternMatch::m_APInt

apint_match m_APInt(const APInt *&Res)

Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.

Definition:PatternMatch.h:299

llvm::PatternMatch::m_Value

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

Definition:PatternMatch.h:92

llvm::PatternMatch::m_NSWAdd

OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1281

llvm::PatternMatch::m_ICmp

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

Definition:PatternMatch.h:1627

llvm::PatternMatch::m_Shl

BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1234

llvm::PatternMatch::m_UAddWithOverflow

UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)

Match an icmp instruction checking for unsigned overflow on addition.

Definition:PatternMatch.h:2548

llvm::PatternMatch::m_LogicalAnd

auto m_LogicalAnd()

Matches L && R where L and R are arbitrary values.

Definition:PatternMatch.h:3081

llvm::PatternMatch::m_Undef

auto m_Undef()

Match an arbitrary undef constant.

Definition:PatternMatch.h:152

llvm::PatternMatch::m_c_Or

BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)

Matches an Or with LHS and RHS in either order.

Definition:PatternMatch.h:2805

llvm::PatternMatch::m_InsertElt

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

Definition:PatternMatch.h:1829

llvm::PatternMatch::m_Sub

BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1114

llvm::PatternMatch::m_CombineOr

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

Definition:PatternMatch.h:239

llvm::SIEncodingFamily::VI

@ VI

Definition:SIDefines.h:37

llvm::SIEncodingFamily::SI

@ SI

Definition:SIDefines.h:36

llvm::SPII::Load

@ Load

Definition:SparcInstrInfo.h:32

llvm::ScaledNumbers::compare

int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)

Compare two scaled numbers.

Definition:ScaledNumber.h:252

llvm::SystemZISD::TM

@ TM

Definition:SystemZISelLowering.h:66

llvm::WinEH::EncodingType::CE

@ CE

Windows NT (Windows on ARM)

llvm::X86::FirstMacroFusionInstKind::Cmp

@ Cmp

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm::codeview::CompileSym2Flags::EC

@ EC

llvm::dwarf::Index

Index

Definition:Dwarf.h:882

llvm::dwarf::Constants

Constants

Definition:Dwarf.h:842

llvm::dxil::PointerTypeAnalysis::run

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

Definition:PointerTypeAnalysis.cpp:191

llvm::logicalview::LVAttributeKind::Location

@ Location

llvm::lowertypetests::DropTestKind::Assume

@ Assume

Do not drop type tests (default).

llvm::ms_demangle::IntrinsicFunctionKind::New

@ New

llvm::ms_demangle::QualifierMangleMode::Result

@ Result

llvm::msgpack::Type::Map

@ Map

llvm::objcopy::AdjustKind::Set

@ Set

llvm::pdb::PDB_SymType::Callee

@ Callee

llvm::rdf::Phi

NodeAddr< PhiNode * > Phi

Definition:RDFGraph.h:390

llvm::sampleprof::Base

@ Base

Definition:Discriminator.h:58

llvm::sys::path::begin

const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)

Get begin iterator over path.

Definition:Path.cpp:226

llvm::sys::path::end

const_iterator end(StringRef path LLVM_LIFETIME_BOUND)

Get end iterator over path.

Definition:Path.cpp:235

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::drop_begin

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

Definition:STLExtras.h:329

llvm::dump

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

Definition:SparseBitVector.h:877

llvm::Offset

@ Offset

Definition:DWP.cpp:480

llvm::RemoveRedundantDbgInstrs

bool RemoveRedundantDbgInstrs(BasicBlock *BB)

Try to remove redundant dbg.value instructions from given basic block.

Definition:BasicBlockUtils.cpp:685

llvm::all_of

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1739

llvm::PseudoProbeType::Block

@ Block

llvm::popcount

int popcount(T Value) noexcept

Count the number of set bits in a value.

Definition:bit.h:385

llvm::size

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

Definition:STLExtras.h:1697

llvm::RecursivelyDeleteTriviallyDeadInstructions

bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

Definition:Local.cpp:546

llvm::ConstantFoldTerminator

bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)

If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...

Definition:Local.cpp:136

llvm::Depth

@ Depth

Definition:SIMachineScheduler.h:36

llvm::pred_end

auto pred_end(const MachineBasicBlock *BB)

Definition:MachineBasicBlock.h:1385

llvm::operator*

APInt operator*(APInt a, uint64_t RHS)

Definition:APInt.h:2204

llvm::isAligned

bool isAligned(Align Lhs, uint64_t SizeInBytes)

Checks that SizeInBytes is a multiple of the alignment.

Definition:Alignment.h:145

llvm::LibFunc

LibFunc

Definition:TargetLibraryInfo.h:68

llvm::salvageDebugInfo

void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)

Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...

Definition:Utils.cpp:1683

llvm::successors

auto successors(const MachineBasicBlock *BB)

Definition:MachineBasicBlock.h:1376

llvm::FoldReturnIntoUncondBranch

ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)

This method duplicates the specified return instruction into a predecessor which ends in an unconditi...

Definition:BasicBlockUtils.cpp:1551

llvm::operator!=

bool operator!=(uint64_t V1, const APInt &V2)

Definition:APInt.h:2082

llvm::DiagnosticPredicateTy::Match

@ Match

llvm::SplitBlockAndInsertIfElse

Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)

Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.

Definition:BasicBlockUtils.cpp:1622

llvm::append_range

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

Definition:STLExtras.h:2115

llvm::shouldOptimizeForSize

bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

Definition:MachineSizeOpts.cpp:27

llvm::make_early_inc_range

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

Definition:STLExtras.h:657

llvm::DeleteDeadBlock

void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)

Delete the specified block, which must have no predecessors.

Definition:BasicBlockUtils.cpp:96

llvm::unique

auto unique(Range &&R, Predicate P)

Definition:STLExtras.h:2055

llvm::getSplatValue

Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

Definition:VectorUtils.cpp:312

llvm::ExpandVariadicsMode::Optimize

@ Optimize

llvm::initializeCodeGenPrepareLegacyPassPass

void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)

llvm::hasBranchWeightOrigin

bool hasBranchWeightOrigin(const Instruction &I)

Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.

Definition:ProfDataUtils.cpp:122

llvm::findDbgValues

void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)

Finds the llvm.dbg.value intrinsics describing a value.

Definition:DebugInfo.cpp:155

llvm::operator==

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

Definition:AddressRanges.h:153

llvm::SplitIndirectBrCriticalEdges

bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)

Definition:BreakCriticalEdges.cpp:338

llvm::print

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)

Definition:GCNRegPressure.cpp:227

llvm::simplifyInstruction

Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)

See if we can compute a simplified version of this instruction.

Definition:InstructionSimplify.cpp:7234

llvm::simplifyAddInst

Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)

Given operands for an Add, fold the result or return null.

Definition:InstructionSimplify.cpp:656

llvm::getKnownAlignment

Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)

Try to infer an alignment for the specified pointer.

Definition:Local.h:242

llvm::erase

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

Definition:STLExtras.h:2107

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1746

llvm::isSplatValue

bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

Definition:VectorUtils.cpp:327

llvm::DeleteDeadPHIs

bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)

Examine each PHI in the given block and delete it if it is dead.

Definition:BasicBlockUtils.cpp:164

llvm::replaceAndRecursivelySimplify

bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)

Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.

Definition:InstructionSimplify.cpp:7310

llvm::HexPrintStyle::Upper

@ Upper

llvm::reverse

auto reverse(ContainerTy &&C)

Definition:STLExtras.h:420

llvm::recognizeBSwapOrBitReverseIdiom

bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)

Try to match a bswap or bitreverse idiom.

Definition:Local.cpp:4096

llvm::sort

void sort(IteratorTy Start, IteratorTy End)

Definition:STLExtras.h:1664

llvm::FPClassTest

FPClassTest

Floating-point class tests, supported by 'is_fpclass' intrinsic.

Definition:FloatingPointMode.h:239

llvm::fcInf

@ fcInf

Definition:FloatingPointMode.h:254

llvm::fcNan

@ fcNan

Definition:FloatingPointMode.h:253

llvm::SplitBlockAndInsertIfThenElse

void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)

SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...

Definition:BasicBlockUtils.cpp:1635

llvm::dbgs

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition:Debug.cpp:163

llvm::none_of

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1753

llvm::isSafeToSpeculativelyExecute

bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)

Return true if the instruction does not have any effects besides calculating the result and does not ...

Definition:ValueTracking.cpp:7043

llvm::createCodeGenPrepareLegacyPass

FunctionPass * createCodeGenPrepareLegacyPass()

createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...

Definition:CodeGenPrepare.cpp:541

llvm::getFCmpCondCode

ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)

getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...

Definition:Analysis.cpp:199

llvm::VerifyLoopInfo

bool VerifyLoopInfo

Enable verification of loop info.

Definition:LoopInfo.cpp:51

llvm::isKnownNonZero

bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)

Return true if the given value is known to be non-zero when defined.

Definition:ValueTracking.cpp:3487

llvm::IRMemLocation::First

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

llvm::attributesPermitTailCall

bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)

Test if given that the input instruction is in the tail call position, if there is an attribute misma...

Definition:Analysis.cpp:584

llvm::MergeBlockIntoPredecessor

bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)

Attempts to merge a block into its predecessor, if possible.

Definition:BasicBlockUtils.cpp:180

llvm::RecurKind::And

@ And

Bitwise or logical AND of integers.

llvm::RecurKind::Add

@ Add

Sum of integers.

llvm::count

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

Definition:STLExtras.h:1938

llvm::operator<<

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

Definition:APFixedPoint.h:303

llvm::isGuaranteedNotToBeUndefOrPoison

bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Return true if this function can prove that V does not have undef bits and is never poison.

Definition:ValueTracking.cpp:7841

llvm::BitWidth

constexpr unsigned BitWidth

Definition:BitmaskEnum.h:217

llvm::extractBranchWeights

bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)

Extract branch weights from MD_prof metadata.

Definition:ProfDataUtils.cpp:170

llvm::pred_begin

auto pred_begin(const MachineBasicBlock *BB)

Definition:MachineBasicBlock.h:1383

llvm::bypassSlowDivision

bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)

This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...

llvm::gep_type_begin

gep_type_iterator gep_type_begin(const User *GEP)

Definition:GetElementPtrTypeIterator.h:173

llvm::erase_if

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

Definition:STLExtras.h:2099

llvm::fcmpToClassTest

std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)

Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...

Definition:ValueTracking.cpp:4519

llvm::predecessors

auto predecessors(const MachineBasicBlock *BB)

Definition:MachineBasicBlock.h:1377

llvm::is_contained

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Definition:STLExtras.h:1903

llvm::commonAlignment

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

Definition:Alignment.h:212

llvm::pred_empty

bool pred_empty(const BasicBlock *BB)

Definition:CFG.h:118

llvm::SplitBlockAndInsertIfThen

Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)

Split the containing block at the specified instruction - everything before SplitBefore stays in the ...

Definition:BasicBlockUtils.cpp:1609

llvm::SplitEdge

BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")

Split the edge connecting the specified blocks, and return the newly created basic block between From...

Definition:BasicBlockUtils.cpp:762

llvm::filterDbgVars

static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)

Filter the DbgRecord range to DbgVariableRecord types only and downcast.

Definition:DebugProgramInstruction.h:555

llvm::simplifyURemInst

Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)

Given operands for a URem, fold the result or return null.

Definition:InstructionSimplify.cpp:1260

llvm::getCGPassBuilderOption

CGPassBuilderOption getCGPassBuilderOption()