Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/CodeGenPrepare.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/PointerIntPair.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/BlockFrequencyInfo.h"
26#include "llvm/Analysis/BranchProbabilityInfo.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/ProfileSummaryInfo.h"
30#include "llvm/Analysis/ScalarEvolutionExpressions.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/CodeGen/Analysis.h"
36#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/SelectionDAGNodes.h"
39#include "llvm/CodeGen/TargetLowering.h"
40#include "llvm/CodeGen/TargetPassConfig.h"
41#include "llvm/CodeGen/TargetSubtargetInfo.h"
42#include "llvm/CodeGen/ValueTypes.h"
43#include "llvm/CodeGenTypes/MachineValueType.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugInfo.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Dominators.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/GetElementPtrTypeIterator.h"
56#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/GlobalVariable.h"
58#include "llvm/IR/IRBuilder.h"
59#include "llvm/IR/InlineAsm.h"
60#include "llvm/IR/InstrTypes.h"
61#include "llvm/IR/Instruction.h"
62#include "llvm/IR/Instructions.h"
63#include "llvm/IR/IntrinsicInst.h"
64#include "llvm/IR/Intrinsics.h"
65#include "llvm/IR/IntrinsicsAArch64.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/MDBuilder.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/PatternMatch.h"
71#include "llvm/IR/ProfDataUtils.h"
72#include "llvm/IR/Statepoint.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/User.h"
76#include "llvm/IR/Value.h"
77#include "llvm/IR/ValueHandle.h"
78#include "llvm/IR/ValueMap.h"
79#include "llvm/InitializePasses.h"
80#include "llvm/Pass.h"
81#include "llvm/Support/BlockFrequency.h"
82#include "llvm/Support/BranchProbability.h"
83#include "llvm/Support/Casting.h"
84#include "llvm/Support/CommandLine.h"
85#include "llvm/Support/Compiler.h"
86#include "llvm/Support/Debug.h"
87#include "llvm/Support/ErrorHandling.h"
88#include "llvm/Support/raw_ostream.h"
89#include "llvm/Target/TargetMachine.h"
90#include "llvm/Target/TargetOptions.h"
91#include "llvm/Transforms/Utils/BasicBlockUtils.h"
92#include "llvm/Transforms/Utils/BypassSlowDivision.h"
93#include "llvm/Transforms/Utils/Local.h"
94#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
95#include "llvm/Transforms/Utils/SizeOpts.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <optional>
103#include <utility>
104#include <vector>
105
106using namespacellvm;
107using namespacellvm::PatternMatch;
108
109#define DEBUG_TYPE "codegenprepare"
110
111STATISTIC(NumBlocksElim,"Number of blocks eliminated");
112STATISTIC(NumPHIsElim,"Number of trivial PHIs eliminated");
113STATISTIC(NumGEPsElim,"Number of GEPs converted to casts");
114STATISTIC(NumCmpUses,"Number of uses of Cmp expressions replaced with uses of "
115"sunken Cmps");
116STATISTIC(NumCastUses,"Number of uses of Cast expressions replaced with uses "
117"of sunken Casts");
118STATISTIC(NumMemoryInsts,"Number of memory instructions whose address "
119"computations were sunk");
120STATISTIC(NumMemoryInstsPhiCreated,
121"Number of phis created when address "
122"computations were sunk to memory instructions");
123STATISTIC(NumMemoryInstsSelectCreated,
124"Number of select created when address "
125"computations were sunk to memory instructions");
126STATISTIC(NumExtsMoved,"Number of [s|z]ext instructions combined with loads");
127STATISTIC(NumExtUses,"Number of uses of [s|z]ext instructions optimized");
128STATISTIC(NumAndsAdded,
129"Number of and mask instructions added to form ext loads");
130STATISTIC(NumAndUses,"Number of uses of and mask instructions optimized");
131STATISTIC(NumRetsDup,"Number of return instructions duplicated");
132STATISTIC(NumDbgValueMoved,"Number of debug value instructions moved");
133STATISTIC(NumSelectsExpanded,"Number of selects turned into branches");
134STATISTIC(NumStoreExtractExposed,"Number of store(extractelement) exposed");
135
136staticcl::opt<bool>DisableBranchOpts(
137"disable-cgp-branch-opts",cl::Hidden,cl::init(false),
138cl::desc("Disable branch optimizations in CodeGenPrepare"));
139
140staticcl::opt<bool>
141DisableGCOpts("disable-cgp-gc-opts",cl::Hidden,cl::init(false),
142cl::desc("Disable GC optimizations in CodeGenPrepare"));
143
144staticcl::opt<bool>
145DisableSelectToBranch("disable-cgp-select2branch",cl::Hidden,
146cl::init(false),
147cl::desc("Disable select to branch conversion."));
148
149staticcl::opt<bool>
150AddrSinkUsingGEPs("addr-sink-using-gep",cl::Hidden,cl::init(true),
151cl::desc("Address sinking in CGP using GEPs."));
152
153staticcl::opt<bool>
154EnableAndCmpSinking("enable-andcmp-sinking",cl::Hidden,cl::init(true),
155cl::desc("Enable sinking and/cmp into branches."));
156
157staticcl::opt<bool>DisableStoreExtract(
158"disable-cgp-store-extract",cl::Hidden,cl::init(false),
159cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
160
161staticcl::opt<bool>StressStoreExtract(
162"stress-cgp-store-extract",cl::Hidden,cl::init(false),
163cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
164
165staticcl::opt<bool>DisableExtLdPromotion(
166"disable-cgp-ext-ld-promotion",cl::Hidden,cl::init(false),
167cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
168"CodeGenPrepare"));
169
170staticcl::opt<bool>StressExtLdPromotion(
171"stress-cgp-ext-ld-promotion",cl::Hidden,cl::init(false),
172cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
173"optimization in CodeGenPrepare"));
174
175staticcl::opt<bool>DisablePreheaderProtect(
176"disable-preheader-prot",cl::Hidden,cl::init(false),
177cl::desc("Disable protection against removing loop preheaders"));
178
179staticcl::opt<bool>ProfileGuidedSectionPrefix(
180"profile-guided-section-prefix",cl::Hidden,cl::init(true),
181cl::desc("Use profile info to add section prefix for hot/cold functions"));
182
183staticcl::opt<bool>ProfileUnknownInSpecialSection(
184"profile-unknown-in-special-section",cl::Hidden,
185cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
186"profile, we cannot tell the function is cold for sure because "
187"it may be a function newly added without ever being sampled. "
188"With the flag enabled, compiler can put such profile unknown "
189"functions into a special section, so runtime system can choose "
190"to handle it in a different way than .text section, to save "
191"RAM for example. "));
192
193staticcl::opt<bool>BBSectionsGuidedSectionPrefix(
194"bbsections-guided-section-prefix",cl::Hidden,cl::init(true),
195cl::desc("Use the basic-block-sections profile to determine the text "
196"section prefix for hot functions. Functions with "
197"basic-block-sections profile will be placed in `.text.hot` "
198"regardless of their FDO profile info. Other functions won't be "
199"impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
200"profiles."));
201
202staticcl::opt<uint64_t>FreqRatioToSkipMerge(
203"cgp-freq-ratio-to-skip-merge",cl::Hidden,cl::init(2),
204cl::desc("Skip merging empty blocks if (frequency of empty block) / "
205"(frequency of destination block) is greater than this ratio"));
206
207staticcl::opt<bool>ForceSplitStore(
208"force-split-store",cl::Hidden,cl::init(false),
209cl::desc("Force store splitting no matter what the target query says."));
210
211staticcl::opt<bool>EnableTypePromotionMerge(
212"cgp-type-promotion-merge",cl::Hidden,
213cl::desc("Enable merging of redundant sexts when one is dominating"
214" the other."),
215cl::init(true));
216
217staticcl::opt<bool>DisableComplexAddrModes(
218"disable-complex-addr-modes",cl::Hidden,cl::init(false),
219cl::desc("Disables combining addressing modes with different parts "
220"in optimizeMemoryInst."));
221
222staticcl::opt<bool>
223AddrSinkNewPhis("addr-sink-new-phis",cl::Hidden,cl::init(false),
224cl::desc("Allow creation of Phis in Address sinking."));
225
226staticcl::opt<bool>AddrSinkNewSelects(
227"addr-sink-new-select",cl::Hidden,cl::init(true),
228cl::desc("Allow creation of selects in Address sinking."));
229
230staticcl::opt<bool>AddrSinkCombineBaseReg(
231"addr-sink-combine-base-reg",cl::Hidden,cl::init(true),
232cl::desc("Allow combining of BaseReg field in Address sinking."));
233
234staticcl::opt<bool>AddrSinkCombineBaseGV(
235"addr-sink-combine-base-gv",cl::Hidden,cl::init(true),
236cl::desc("Allow combining of BaseGV field in Address sinking."));
237
238staticcl::opt<bool>AddrSinkCombineBaseOffs(
239"addr-sink-combine-base-offs",cl::Hidden,cl::init(true),
240cl::desc("Allow combining of BaseOffs field in Address sinking."));
241
242staticcl::opt<bool>AddrSinkCombineScaledReg(
243"addr-sink-combine-scaled-reg",cl::Hidden,cl::init(true),
244cl::desc("Allow combining of ScaledReg field in Address sinking."));
245
246staticcl::opt<bool>
247EnableGEPOffsetSplit("cgp-split-large-offset-gep",cl::Hidden,
248cl::init(true),
249cl::desc("Enable splitting large offset of GEP."));
250
251staticcl::opt<bool>EnableICMP_EQToICMP_ST(
252"cgp-icmp-eq2icmp-st",cl::Hidden,cl::init(false),
253cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
254
255staticcl::opt<bool>
256VerifyBFIUpdates("cgp-verify-bfi-updates",cl::Hidden,cl::init(false),
257cl::desc("Enable BFI update verification for "
258"CodeGenPrepare."));
259
260staticcl::opt<bool>
261OptimizePhiTypes("cgp-optimize-phi-types",cl::Hidden,cl::init(true),
262cl::desc("Enable converting phi types in CodeGenPrepare"));
263
264staticcl::opt<unsigned>
265HugeFuncThresholdInCGPP("cgpp-huge-func",cl::init(10000),cl::Hidden,
266cl::desc("Least BB number of huge function."));
267
268staticcl::opt<unsigned>
269MaxAddressUsersToScan("cgp-max-address-users-to-scan",cl::init(100),
270cl::Hidden,
271cl::desc("Max number of address users to look at"));
272
273staticcl::opt<bool>
274DisableDeletePHIs("disable-cgp-delete-phis",cl::Hidden,cl::init(false),
275cl::desc("Disable elimination of dead PHI nodes."));
276
277namespace{
278
279enum ExtType {
280 ZeroExtension,// Zero extension has been seen.
281 SignExtension,// Sign extension has been seen.
282 BothExtension// This extension type is used if we saw sext after
283// ZeroExtension had been set, or if we saw zext after
284// SignExtension had been set. It makes the type
285// information of a promoted instruction invalid.
286};
287
288enum ModifyDT {
289 NotModifyDT,// Not Modify any DT.
290 ModifyBBDT,// Modify the Basic Block Dominator Tree.
291 ModifyInstDT// Modify the Instruction Dominator in a Basic Block,
292// This usually means we move/delete/insert instruction
293// in a Basic Block. So we should re-iterate instructions
294// in such Basic Block.
295};
296
297usingSetOfInstrs =SmallPtrSet<Instruction *, 16>;
298usingTypeIsSExt =PointerIntPair<Type *, 2, ExtType>;
299usingInstrToOrigTy =DenseMap<Instruction *, TypeIsSExt>;
300usingSExts =SmallVector<Instruction *, 16>;
301usingValueToSExts =MapVector<Value *, SExts>;
302
303classTypePromotionTransaction;
304
305classCodeGenPrepare {
306friendclassCodeGenPrepareLegacyPass;
307constTargetMachine *TM =nullptr;
308constTargetSubtargetInfo *SubtargetInfo =nullptr;
309constTargetLowering *TLI =nullptr;
310constTargetRegisterInfo *TRI =nullptr;
311constTargetTransformInfo *TTI =nullptr;
312constBasicBlockSectionsProfileReader *BBSectionsProfileReader =nullptr;
313constTargetLibraryInfo *TLInfo =nullptr;
314LoopInfo *LI =nullptr;
315 std::unique_ptr<BlockFrequencyInfo>BFI;
316 std::unique_ptr<BranchProbabilityInfo> BPI;
317ProfileSummaryInfo *PSI =nullptr;
318
319 /// As we scan instructions optimizing them, this is the next instruction
320 /// to optimize. Transforms that can invalidate this should update it.
321BasicBlock::iterator CurInstIterator;
322
323 /// Keeps track of non-local addresses that have been sunk into a block.
324 /// This allows us to avoid inserting duplicate code for blocks with
325 /// multiple load/stores of the same address. The usage of WeakTrackingVH
326 /// enables SunkAddrs to be treated as a cache whose entries can be
327 /// invalidated if a sunken address computation has been erased.
328ValueMap<Value *, WeakTrackingVH> SunkAddrs;
329
330 /// Keeps track of all instructions inserted for the current function.
331 SetOfInstrs InsertedInsts;
332
333 /// Keeps track of the type of the related instruction before their
334 /// promotion for the current function.
335 InstrToOrigTy PromotedInsts;
336
337 /// Keep track of instructions removed during promotion.
338 SetOfInstrs RemovedInsts;
339
340 /// Keep track of sext chains based on their initial value.
341DenseMap<Value *, Instruction *> SeenChainsForSExt;
342
343 /// Keep track of GEPs accessing the same data structures such as structs or
344 /// arrays that are candidates to be split later because of their large
345 /// size.
346MapVector<AssertingVH<Value>,
347SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
348 LargeOffsetGEPMap;
349
350 /// Keep track of new GEP base after splitting the GEPs having large offset.
351SmallSet<AssertingVH<Value>, 2> NewGEPBases;
352
353 /// Map serial numbers to Large offset GEPs.
354DenseMap<AssertingVH<GetElementPtrInst>,int> LargeOffsetGEPID;
355
356 /// Keep track of SExt promoted.
357 ValueToSExts ValToSExtendedUses;
358
359 /// True if the function has the OptSize attribute.
360bool OptSize;
361
362 /// DataLayout for the Function being processed.
363constDataLayout *DL =nullptr;
364
365 /// Building the dominator tree can be expensive, so we only build it
366 /// lazily and update it when required.
367 std::unique_ptr<DominatorTree> DT;
368
369public:
370 CodeGenPrepare(){};
371 CodeGenPrepare(constTargetMachine *TM) :TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373bool IsHugeFunc =false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
379SmallSet<BasicBlock *, 32> FreshBBs;
380
381void releaseMemory() {
382// Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 BPI.reset();
387BFI.reset();
388 }
389
390boolrun(Function &F,FunctionAnalysisManager &AM);
391
392private:
393template <typename F>
394void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB,F f) {
395// Substituting can cause recursive simplifications, which can invalidate
396// our iterator. Use a WeakTrackingVH to hold onto it in case this
397// happens.
398Value *CurValue = &*CurInstIterator;
399WeakTrackingVH IterHandle(CurValue);
400
401f();
402
403// If the iterator instruction was recursively deleted, start over at the
404// start of the block.
405if (IterHandle != CurValue) {
406 CurInstIterator = BB->begin();
407 SunkAddrs.clear();
408 }
409 }
410
411// Get the DominatorTree, building if necessary.
412DominatorTree &getDT(Function &F) {
413if (!DT)
414 DT = std::make_unique<DominatorTree>(F);
415return *DT;
416 }
417
418void removeAllAssertingVHReferences(Value *V);
419bool eliminateAssumptions(Function &F);
420bool eliminateFallThrough(Function &F,DominatorTree *DT =nullptr);
421bool eliminateMostlyEmptyBlocks(Function &F);
422BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
423bool canMergeBlocks(constBasicBlock *BB,constBasicBlock *DestBB)const;
424void eliminateMostlyEmptyBlock(BasicBlock *BB);
425bool isMergingEmptyBlockProfitable(BasicBlock *BB,BasicBlock *DestBB,
426bool isPreheader);
427bool makeBitReverse(Instruction &I);
428booloptimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
429bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
430bool optimizeMemoryInst(Instruction *MemoryInst,Value *Addr,Type *AccessTy,
431unsigned AddrSpace);
432bool optimizeGatherScatterInst(Instruction *MemoryInst,Value *Ptr);
433bool optimizeInlineAsmInst(CallInst *CS);
434booloptimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
435bool optimizeExt(Instruction *&I);
436bool optimizeExtUses(Instruction *I);
437bool optimizeLoadExt(LoadInst *Load);
438bool optimizeShiftInst(BinaryOperator *BO);
439bool optimizeFunnelShift(IntrinsicInst *Fsh);
440bool optimizeSelectInst(SelectInst *SI);
441bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
442bool optimizeSwitchType(SwitchInst *SI);
443bool optimizeSwitchPhiConstants(SwitchInst *SI);
444bool optimizeSwitchInst(SwitchInst *SI);
445bool optimizeExtractElementInst(Instruction *Inst);
446bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
447bool fixupDbgValue(Instruction *I);
448bool fixupDbgVariableRecord(DbgVariableRecord &I);
449bool fixupDbgVariableRecordsOnInst(Instruction &I);
450bool placeDbgValues(Function &F);
451bool placePseudoProbes(Function &F);
452bool canFormExtLd(constSmallVectorImpl<Instruction *> &MovedExts,
453LoadInst *&LI,Instruction *&Inst,bool HasPromoted);
454bool tryToPromoteExts(TypePromotionTransaction &TPT,
455constSmallVectorImpl<Instruction *> &Exts,
456SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457unsigned CreatedInstsCost = 0);
458bool mergeSExts(Function &F);
459bool splitLargeGEPOffsets();
460bool optimizePhiType(PHINode *Inst,SmallPtrSetImpl<PHINode *> &Visited,
461SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462bool optimizePhiTypes(Function &F);
463bool performAddressTypePromotion(
464Instruction *&Inst,bool AllowPromotionWithoutCommonHeader,
465bool HasPromoted, TypePromotionTransaction &TPT,
466SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470bool tryToSinkFreeOperands(Instruction *I);
471bool replaceMathCmpWithIntrinsic(BinaryOperator *BO,Value *Arg0,Value *Arg1,
472CmpInst *Cmp,Intrinsic::ID IID);
473bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474bool optimizeURem(Instruction *Rem);
475bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477void verifyBFIUpdates(Function &F);
478bool _run(Function &F);
479};
480
481classCodeGenPrepareLegacyPass :publicFunctionPass {
482public:
483staticcharID;// Pass identification, replacement for typeid
484
485 CodeGenPrepareLegacyPass() :FunctionPass(ID) {
486initializeCodeGenPrepareLegacyPassPass(*PassRegistry::getPassRegistry());
487 }
488
489boolrunOnFunction(Function &F)override;
490
491StringRefgetPassName() const override{return"CodeGen Prepare"; }
492
493voidgetAnalysisUsage(AnalysisUsage &AU) const override{
494// FIXME: When we can selectively preserve passes, preserve the domtree.
495 AU.addRequired<ProfileSummaryInfoWrapperPass>();
496 AU.addRequired<TargetLibraryInfoWrapperPass>();
497 AU.addRequired<TargetPassConfig>();
498 AU.addRequired<TargetTransformInfoWrapperPass>();
499 AU.addRequired<LoopInfoWrapperPass>();
500 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
501 }
502};
503
504}// end anonymous namespace
505
506char CodeGenPrepareLegacyPass::ID = 0;
507
508bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
509if (skipFunction(F))
510returnfalse;
511autoTM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
512 CodeGenPrepare CGP(TM);
513 CGP.DL = &F.getDataLayout();
514 CGP.SubtargetInfo =TM->getSubtargetImpl(F);
515 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
516 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
517 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
518 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
519 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
520 CGP.BPI.reset(newBranchProbabilityInfo(F, *CGP.LI));
521 CGP.BFI.reset(newBlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
522 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
523auto BBSPRWP =
524 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
525 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() :nullptr;
526
527return CGP._run(F);
528}
529
530INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass,DEBUG_TYPE,
531"Optimize for code generation",false,false)
532INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
533INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
534INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
535INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
536INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
537INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
538INITIALIZE_PASS_END(CodeGenPrepareLegacyPass,DEBUG_TYPE,
539 "Optimizefor codegeneration",false,false)
540
541FunctionPass *llvm::createCodeGenPrepareLegacyPass() {
542returnnew CodeGenPrepareLegacyPass();
543}
544
545PreservedAnalysesCodeGenPreparePass::run(Function &F,
546FunctionAnalysisManager &AM) {
547 CodeGenPrepare CGP(TM);
548
549bool Changed = CGP.run(F, AM);
550if (!Changed)
551returnPreservedAnalyses::all();
552
553PreservedAnalyses PA;
554 PA.preserve<TargetLibraryAnalysis>();
555 PA.preserve<TargetIRAnalysis>();
556 PA.preserve<LoopAnalysis>();
557return PA;
558}
559
560bool CodeGenPrepare::run(Function &F,FunctionAnalysisManager &AM) {
561DL = &F.getDataLayout();
562 SubtargetInfo = TM->getSubtargetImpl(F);
563 TLI = SubtargetInfo->getTargetLowering();
564TRI = SubtargetInfo->getRegisterInfo();
565 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
566TTI = &AM.getResult<TargetIRAnalysis>(F);
567 LI = &AM.getResult<LoopAnalysis>(F);
568 BPI.reset(newBranchProbabilityInfo(F, *LI));
569 BFI.reset(newBlockFrequencyInfo(F, *BPI, *LI));
570auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
571 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
572 BBSectionsProfileReader =
573 AM.getCachedResult<BasicBlockSectionsProfileReaderAnalysis>(F);
574return _run(F);
575}
576
577bool CodeGenPrepare::_run(Function &F) {
578bool EverMadeChange =false;
579
580 OptSize =F.hasOptSize();
581// Use the basic-block-sections profile to promote hot functions to .text.hot
582// if requested.
583if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
584 BBSectionsProfileReader->isFunctionHot(F.getName())) {
585F.setSectionPrefix("hot");
586 }elseif (ProfileGuidedSectionPrefix) {
587// The hot attribute overwrites profile count based hotness while profile
588// counts based hotness overwrite the cold attribute.
589// This is a conservative behabvior.
590if (F.hasFnAttribute(Attribute::Hot) ||
591 PSI->isFunctionHotInCallGraph(&F, *BFI))
592F.setSectionPrefix("hot");
593// If PSI shows this function is not hot, we will placed the function
594// into unlikely section if (1) PSI shows this is a cold function, or
595// (2) the function has a attribute of cold.
596elseif (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
597F.hasFnAttribute(Attribute::Cold))
598F.setSectionPrefix("unlikely");
599elseif (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
600 PSI->isFunctionHotnessUnknown(F))
601F.setSectionPrefix("unknown");
602 }
603
604 /// This optimization identifies DIV instructions that can be
605 /// profitably bypassed and carried out with a shorter, faster divide.
606if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
607constDenseMap<unsigned int, unsigned int> &BypassWidths =
608 TLI->getBypassSlowDivWidths();
609BasicBlock *BB = &*F.begin();
610while (BB !=nullptr) {
611// bypassSlowDivision may create new BBs, but we don't want to reapply the
612// optimization to those blocks.
613BasicBlock *Next = BB->getNextNode();
614if (!llvm::shouldOptimizeForSize(BB, PSI,BFI.get()))
615 EverMadeChange |=bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620// Get rid of @llvm.assume builtins before attempting to eliminate empty
621// blocks, since there might be blocks that only contain @llvm.assume calls
622// (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625// Eliminate blocks that contain only PHI nodes and an
626// unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
630if (!DisableBranchOpts)
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633// Split some critical edges where one of the sources is an indirect branch,
634// to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636SplitIndirectBrCriticalEdges(F,/*IgnoreBlocksWithoutPHI=*/true);
637
638// If we are optimzing huge function, we need to consider the build time.
639// Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc =F.size() >HugeFuncThresholdInCGPP;
641
642// Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647bool MadeChange =true;
648bool FuncIterated =false;
649while (MadeChange) {
650 MadeChange =false;
651
652for (BasicBlock &BB :llvm::make_early_inc_range(F)) {
653if (FuncIterated && !FreshBBs.contains(&BB))
654continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657bool Changed =optimizeBlock(BB, ModifiedDTOnIteration);
658
659if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663if (IsHugeFunc) {
664// If the BB is updated, it may still has chance to be optimized.
665// This usually happen at sink optimization.
666// For example:
667//
668// bb0:
669// %and = and i32 %a, 4
670// %cmp = icmp eq i32 %and, 0
671//
672// If the %cmp sink to other BB, the %and will has chance to sink.
673if (Changed)
674 FreshBBs.insert(&BB);
675elseif (FuncIterated)
676 FreshBBs.erase(&BB);
677 }else {
678// For small/normal functions, we restart BB iteration if the dominator
679// tree of the Function was changed.
680if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681break;
682 }
683 }
684// We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697if (MadeChange &&VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701// Really free removed instructions during promotion.
702for (Instruction *I : RemovedInsts)
703I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716if (!DisableBranchOpts) {
717 MadeChange =false;
718// Use a set vector to get deterministic iteration order. The order the
719// blocks are removed may affect whether or not PHI nodes in successors
720// are removed.
721SmallSetVector<BasicBlock *, 8> WorkList;
722for (BasicBlock &BB :F) {
723SmallVector<BasicBlock *, 2> Successors(successors(&BB));
724 MadeChange |=ConstantFoldTerminator(&BB,true);
725if (!MadeChange)
726continue;
727
728for (BasicBlock *Succ : Successors)
729if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733// Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735while (!WorkList.empty()) {
736BasicBlock *BB = WorkList.pop_back_val();
737SmallVector<BasicBlock *, 2> Successors(successors(BB));
738
739DeleteDeadBlock(BB);
740
741for (BasicBlock *Succ : Successors)
742if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746// Merge pairs of basic blocks with unconditional branches, connected by
747// a single edge.
748if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754if (!DisableGCOpts) {
755SmallVector<GCStatepointInst *, 2> Statepoints;
756for (BasicBlock &BB :F)
757for (Instruction &I : BB)
758if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764// Do this last to clean up use-before-def scenarios introduced by other
765// preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
770if (VerifyBFIUpdates)
771 verifyBFIUpdates(F);
772#endif
773
774return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778bool MadeChange =false;
779for (BasicBlock &BB :F) {
780 CurInstIterator = BB.begin();
781while (CurInstIterator != BB.end()) {
782Instruction *I = &*(CurInstIterator++);
783if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange =true;
785Value *Operand =Assume->getOperand(0);
786Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo,nullptr);
790 });
791 }
792 }
793 }
794return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
803autoGEP = dyn_cast<GetElementPtrInst>(V);
804if (!GEP)
805return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810if (VecI == LargeOffsetGEPMap.end())
811return;
812
813auto &GEPVector = VecI->second;
814llvm::erase_if(GEPVector, [=](auto &Elt) {return Elt.first ==GEP; });
815
816if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821voidLLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
822DominatorTree NewDT(F);
823LoopInfo NewLI(NewDT);
824BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F,DominatorTree *DT) {
833bool Changed =false;
834// Scan all of the blocks in the function, except for the entry block.
835// Use a temporary array to avoid iterator being invalidated when
836// deleting blocks.
837SmallVector<WeakTrackingVH, 16>Blocks;
838for (auto &Block :llvm::drop_begin(F))
839Blocks.push_back(&Block);
840
841SmallSet<WeakTrackingVH, 16> Preds;
842for (auto &Block :Blocks) {
843auto *BB = cast_or_null<BasicBlock>(Block);
844if (!BB)
845continue;
846// If the destination block has a single pred, then this is a trivial
847// edge, just collapse it.
848BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850// Don't merge if BB's address is taken.
851if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852continue;
853
854// Make an effort to skip unreachable blocks.
855if (DT && !DT->isReachableFromEntry(BB))
856continue;
857
858BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859if (Term && !Term->isConditional()) {
860 Changed =true;
861LLVM_DEBUG(dbgs() <<"To merge:\n" << *BB <<"\n\n\n");
862
863// Merge BB into SinglePred and delete it.
864MergeBlockIntoPredecessor(BB,/* DTU */nullptr, LI,/* MSSAU */nullptr,
865/* MemDep */nullptr,
866/* PredecessorWithTwoSuccessors */false, DT);
867 Preds.insert(SinglePred);
868
869if (IsHugeFunc) {
870// Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877// (Repeatedly) merging blocks into their predecessors can create redundant
878// debug intrinsics.
879for (constauto &Pred : Preds)
880if (auto *BB = cast_or_null<BasicBlock>(Pred))
881RemoveRedundantDbgInstrs(BB);
882
883return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888// If this block doesn't end with an uncond branch, ignore it.
889BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890if (!BI || !BI->isUnconditional())
891returnnullptr;
892
893// If the instruction before the branch (skipping debug info) isn't a phi
894// node, then other stuff is happening here.
895BasicBlock::iterator BBI = BI->getIterator();
896if (BBI != BB->begin()) {
897 --BBI;
898while (isa<DbgInfoIntrinsic>(BBI)) {
899if (BBI == BB->begin())
900break;
901 --BBI;
902 }
903if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
904returnnullptr;
905 }
906
907// Do not break infinite loops.
908BasicBlock *DestBB = BI->getSuccessor(0);
909if (DestBB == BB)
910returnnullptr;
911
912if (!canMergeBlocks(BB, DestBB))
913 DestBB =nullptr;
914
915return DestBB;
916}
917
918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
920/// edges in ways that are non-optimal for isel. Start by eliminating these
921/// blocks so we can split them the way we want them.
922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
923SmallPtrSet<BasicBlock *, 16> Preheaders;
924SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
925while (!LoopList.empty()) {
926Loop *L = LoopList.pop_back_val();
927llvm::append_range(LoopList, *L);
928if (BasicBlock *Preheader =L->getLoopPreheader())
929 Preheaders.insert(Preheader);
930 }
931
932bool MadeChange =false;
933// Copy blocks into a temporary array to avoid iterator invalidation issues
934// as we remove them.
935// Note that this intentionally skips the entry block.
936SmallVector<WeakTrackingVH, 16>Blocks;
937for (auto &Block :llvm::drop_begin(F)) {
938// Delete phi nodes that could block deleting other empty blocks.
939if (!DisableDeletePHIs)
940 MadeChange |=DeleteDeadPHIs(&Block, TLInfo);
941Blocks.push_back(&Block);
942 }
943
944for (auto &Block :Blocks) {
945BasicBlock *BB = cast_or_null<BasicBlock>(Block);
946if (!BB)
947continue;
948BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
949if (!DestBB ||
950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
951continue;
952
953 eliminateMostlyEmptyBlock(BB);
954 MadeChange =true;
955 }
956return MadeChange;
957}
958
959bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
960BasicBlock *DestBB,
961bool isPreheader) {
962// Do not delete loop preheaders if doing so would create a critical edge.
963// Loop preheaders can be good locations to spill registers. If the
964// preheader is deleted and we create a critical edge, registers may be
965// spilled in the loop body instead.
966if (!DisablePreheaderProtect && isPreheader &&
967 !(BB->getSinglePredecessor() &&
968 BB->getSinglePredecessor()->getSingleSuccessor()))
969returnfalse;
970
971// Skip merging if the block's successor is also a successor to any callbr
972// that leads to this block.
973// FIXME: Is this really needed? Is this a correctness issue?
974for (BasicBlock *Pred :predecessors(BB)) {
975if (isa<CallBrInst>(Pred->getTerminator()) &&
976llvm::is_contained(successors(Pred), DestBB))
977returnfalse;
978 }
979
980// Try to skip merging if the unique predecessor of BB is terminated by a
981// switch or indirect branch instruction, and BB is used as an incoming block
982// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
983// add COPY instructions in the predecessor of BB instead of BB (if it is not
984// merged). Note that the critical edge created by merging such blocks wont be
985// split in MachineSink because the jump table is not analyzable. By keeping
986// such empty block (BB), ISel will place COPY instructions in BB, not in the
987// predecessor of BB.
988BasicBlock *Pred = BB->getUniquePredecessor();
989if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
990 isa<IndirectBrInst>(Pred->getTerminator())))
991returntrue;
992
993if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
994returntrue;
995
996// We use a simple cost heuristic which determine skipping merging is
997// profitable if the cost of skipping merging is less than the cost of
998// merging : Cost(skipping merging) < Cost(merging BB), where the
999// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1000// the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1001// Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1002// Freq(Pred) / Freq(BB) > 2.
1003// Note that if there are multiple empty blocks sharing the same incoming
1004// value for the PHIs in the DestBB, we consider them together. In such
1005// case, Cost(merging BB) will be the sum of their frequencies.
1006
1007if (!isa<PHINode>(DestBB->begin()))
1008returntrue;
1009
1010SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1011
1012// Find all other incoming blocks from which incoming values of all PHIs in
1013// DestBB are the same as the ones from BB.
1014for (BasicBlock *DestBBPred :predecessors(DestBB)) {
1015if (DestBBPred == BB)
1016continue;
1017
1018if (llvm::all_of(DestBB->phis(), [&](constPHINode &DestPN) {
1019 return DestPN.getIncomingValueForBlock(BB) ==
1020 DestPN.getIncomingValueForBlock(DestBBPred);
1021 }))
1022 SameIncomingValueBBs.insert(DestBBPred);
1023 }
1024
1025// See if all BB's incoming values are same as the value from Pred. In this
1026// case, no reason to skip merging because COPYs are expected to be place in
1027// Pred already.
1028if (SameIncomingValueBBs.count(Pred))
1029returntrue;
1030
1031BlockFrequency PredFreq =BFI->getBlockFreq(Pred);
1032BlockFrequencyBBFreq =BFI->getBlockFreq(BB);
1033
1034for (auto *SameValueBB : SameIncomingValueBBs)
1035if (SameValueBB->getUniquePredecessor() == Pred &&
1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1037BBFreq +=BFI->getBlockFreq(SameValueBB);
1038
1039 std::optional<BlockFrequency> Limit =BBFreq.mul(FreqRatioToSkipMerge);
1040return !Limit || PredFreq <= *Limit;
1041}
1042
1043/// Return true if we can merge BB into DestBB if there is a single
1044/// unconditional branch between them, and BB contains no other non-phi
1045/// instructions.
1046bool CodeGenPrepare::canMergeBlocks(constBasicBlock *BB,
1047constBasicBlock *DestBB) const{
1048// We only want to eliminate blocks whose phi nodes are used by phi nodes in
1049// the successor. If there are more complex condition (e.g. preheaders),
1050// don't mess around with them.
1051for (constPHINode &PN : BB->phis()) {
1052for (constUser *U : PN.users()) {
1053constInstruction *UI = cast<Instruction>(U);
1054if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1055returnfalse;
1056// If User is inside DestBB block and it is a PHINode then check
1057// incoming value. If incoming value is not from BB then this is
1058// a complex condition (e.g. preheaders) we want to avoid here.
1059if (UI->getParent() == DestBB) {
1060if (constPHINode *UPN = dyn_cast<PHINode>(UI))
1061for (unsignedI = 0, E = UPN->getNumIncomingValues();I != E; ++I) {
1062Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1063if (Insn &&Insn->getParent() == BB &&
1064Insn->getParent() != UPN->getIncomingBlock(I))
1065returnfalse;
1066 }
1067 }
1068 }
1069 }
1070
1071// If BB and DestBB contain any common predecessors, then the phi nodes in BB
1072// and DestBB may have conflicting incoming values for the block. If so, we
1073// can't merge the block.
1074constPHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1075if (!DestBBPN)
1076returntrue;// no conflict.
1077
1078// Collect the preds of BB.
1079SmallPtrSet<const BasicBlock *, 16> BBPreds;
1080if (constPHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1081// It is faster to get preds from a PHI than with pred_iterator.
1082for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1083 BBPreds.insert(BBPN->getIncomingBlock(i));
1084 }else {
1085 BBPreds.insert(pred_begin(BB),pred_end(BB));
1086 }
1087
1088// Walk the preds of DestBB.
1089for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1090BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1091if (BBPreds.count(Pred)) {// Common predecessor?
1092for (constPHINode &PN : DestBB->phis()) {
1093constValue *V1 = PN.getIncomingValueForBlock(Pred);
1094constValue *V2 = PN.getIncomingValueForBlock(BB);
1095
1096// If V2 is a phi node in BB, look up what the mapped value will be.
1097if (constPHINode *V2PN = dyn_cast<PHINode>(V2))
1098if (V2PN->getParent() == BB)
1099V2 = V2PN->getIncomingValueForBlock(Pred);
1100
1101// If there is a conflict, bail out.
1102if (V1 != V2)
1103returnfalse;
1104 }
1105 }
1106 }
1107
1108returntrue;
1109}
1110
1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1112staticvoidreplaceAllUsesWith(Value *Old,Value *New,
1113SmallSet<BasicBlock *, 32> &FreshBBs,
1114bool IsHuge) {
1115auto *OldI = dyn_cast<Instruction>(Old);
1116if (OldI) {
1117for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1118 UI != E; ++UI) {
1119Instruction *User = cast<Instruction>(*UI);
1120if (IsHuge)
1121 FreshBBs.insert(User->getParent());
1122 }
1123 }
1124 Old->replaceAllUsesWith(New);
1125}
1126
1127/// Eliminate a basic block that has only phi's and an unconditional branch in
1128/// it.
1129void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1130BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1131BasicBlock *DestBB = BI->getSuccessor(0);
1132
1133LLVM_DEBUG(dbgs() <<"MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1134 << *BB << *DestBB);
1135
1136// If the destination block has a single pred, then this is a trivial edge,
1137// just collapse it.
1138if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1139if (SinglePred != DestBB) {
1140assert(SinglePred == BB &&
1141"Single predecessor not the same as predecessor");
1142// Merge DestBB into SinglePred/BB and delete it.
1143MergeBlockIntoPredecessor(DestBB);
1144// Note: BB(=SinglePred) will not be deleted on this path.
1145// DestBB(=its single successor) is the one that was deleted.
1146LLVM_DEBUG(dbgs() <<"AFTER:\n" << *SinglePred <<"\n\n\n");
1147
1148if (IsHugeFunc) {
1149// Update FreshBBs to optimize the merged BB.
1150 FreshBBs.insert(SinglePred);
1151 FreshBBs.erase(DestBB);
1152 }
1153return;
1154 }
1155 }
1156
1157// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1158// to handle the new incoming edges it is about to have.
1159for (PHINode &PN : DestBB->phis()) {
1160// Remove the incoming value for BB, and remember it.
1161Value *InVal = PN.removeIncomingValue(BB,false);
1162
1163// Two options: either the InVal is a phi node defined in BB or it is some
1164// value that dominates BB.
1165PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1166if (InValPhi && InValPhi->getParent() == BB) {
1167// Add all of the input values of the input PHI as inputs of this phi.
1168for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1169 PN.addIncoming(InValPhi->getIncomingValue(i),
1170 InValPhi->getIncomingBlock(i));
1171 }else {
1172// Otherwise, add one instance of the dominating value for each edge that
1173// we will be adding.
1174if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1175for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1177 }else {
1178for (BasicBlock *Pred :predecessors(BB))
1179 PN.addIncoming(InVal, Pred);
1180 }
1181 }
1182 }
1183
1184// Preserve loop Metadata.
1185if (BI->hasMetadata(LLVMContext::MD_loop)) {
1186for (auto *Pred :predecessors(BB))
1187 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1188 }
1189
1190// The PHIs are now updated, change everything that refers to BB to use
1191// DestBB and remove BB.
1192 BB->replaceAllUsesWith(DestBB);
1193 BB->eraseFromParent();
1194 ++NumBlocksElim;
1195
1196LLVM_DEBUG(dbgs() <<"AFTER:\n" << *DestBB <<"\n\n\n");
1197}
1198
1199// Computes a map of base pointer relocation instructions to corresponding
1200// derived pointer relocation instructions given a vector of all relocate calls
1201staticvoidcomputeBaseDerivedRelocateMap(
1202constSmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1203MapVector<GCRelocateInst *,SmallVector<GCRelocateInst *, 0>>
1204 &RelocateInstMap) {
1205// Collect information in two maps: one primarily for locating the base object
1206// while filling the second map; the second map is the final structure holding
1207// a mapping between Base and corresponding Derived relocate calls
1208MapVector<std::pair<unsigned, unsigned>,GCRelocateInst *> RelocateIdxMap;
1209for (auto *ThisRelocate : AllRelocateCalls) {
1210auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1211 ThisRelocate->getDerivedPtrIndex());
1212 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1213 }
1214for (auto &Item : RelocateIdxMap) {
1215 std::pair<unsigned, unsigned> Key = Item.first;
1216if (Key.first == Key.second)
1217// Base relocation: nothing to insert
1218continue;
1219
1220GCRelocateInst *I = Item.second;
1221auto BaseKey = std::make_pair(Key.first, Key.first);
1222
1223// We're iterating over RelocateIdxMap so we cannot modify it.
1224auto MaybeBase = RelocateIdxMap.find(BaseKey);
1225if (MaybeBase == RelocateIdxMap.end())
1226// TODO: We might want to insert a new base object relocate and gep off
1227// that, if there are enough derived object relocates.
1228continue;
1229
1230 RelocateInstMap[MaybeBase->second].push_back(I);
1231 }
1232}
1233
1234// Accepts a GEP and extracts the operands into a vector provided they're all
1235// small integer constants
1236staticboolgetGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
1237SmallVectorImpl<Value *> &OffsetV) {
1238for (unsigned i = 1; i <GEP->getNumOperands(); i++) {
1239// Only accept small constant integer operands
1240auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1241if (!Op ||Op->getZExtValue() > 20)
1242returnfalse;
1243 }
1244
1245for (unsigned i = 1; i <GEP->getNumOperands(); i++)
1246 OffsetV.push_back(GEP->getOperand(i));
1247returntrue;
1248}
1249
1250// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1251// replace, computes a replacement, and affects it.
1252staticbool
1253simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
1254constSmallVectorImpl<GCRelocateInst *> &Targets) {
1255bool MadeChange =false;
1256// We must ensure the relocation of derived pointer is defined after
1257// relocation of base pointer. If we find a relocation corresponding to base
1258// defined earlier than relocation of base then we move relocation of base
1259// right before found relocation. We consider only relocation in the same
1260// basic block as relocation of base. Relocations from other basic block will
1261// be skipped by optimization and we do not care about them.
1262for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1263 &*R != RelocatedBase; ++R)
1264if (auto *RI = dyn_cast<GCRelocateInst>(R))
1265if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1266if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1267 RelocatedBase->moveBefore(RI->getIterator());
1268 MadeChange =true;
1269break;
1270 }
1271
1272for (GCRelocateInst *ToReplace : Targets) {
1273assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1274"Not relocating a derived object of the original base object");
1275if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1276// A duplicate relocate call. TODO: coalesce duplicates.
1277continue;
1278 }
1279
1280if (RelocatedBase->getParent() != ToReplace->getParent()) {
1281// Base and derived relocates are in different basic blocks.
1282// In this case transform is only valid when base dominates derived
1283// relocate. However it would be too expensive to check dominance
1284// for each such relocate, so we skip the whole transformation.
1285continue;
1286 }
1287
1288Value *Base = ToReplace->getBasePtr();
1289auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1290if (!Derived || Derived->getPointerOperand() !=Base)
1291continue;
1292
1293SmallVector<Value *, 2> OffsetV;
1294if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1295continue;
1296
1297// Create a Builder and replace the target callsite with a gep
1298assert(RelocatedBase->getNextNode() &&
1299"Should always have one since it's not a terminator");
1300
1301// Insert after RelocatedBase
1302IRBuilder<> Builder(RelocatedBase->getNextNode());
1303 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1304
1305// If gc_relocate does not match the actual type, cast it to the right type.
1306// In theory, there must be a bitcast after gc_relocate if the type does not
1307// match, and we should reuse it to get the derived pointer. But it could be
1308// cases like this:
1309// bb1:
1310// ...
1311// %g1 = call coldcc i8 addrspace(1)*
1312// @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1313//
1314// bb2:
1315// ...
1316// %g2 = call coldcc i8 addrspace(1)*
1317// @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1318//
1319// merge:
1320// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1321// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1322//
1323// In this case, we can not find the bitcast any more. So we insert a new
1324// bitcast no matter there is already one or not. In this way, we can handle
1325// all cases, and the extra bitcast should be optimized away in later
1326// passes.
1327Value *ActualRelocatedBase = RelocatedBase;
1328if (RelocatedBase->getType() !=Base->getType()) {
1329 ActualRelocatedBase =
1330 Builder.CreateBitCast(RelocatedBase,Base->getType());
1331 }
1332Value *Replacement =
1333 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1334ArrayRef(OffsetV));
1335 Replacement->takeName(ToReplace);
1336// If the newly generated derived pointer's type does not match the original
1337// derived pointer's type, cast the new derived pointer to match it. Same
1338// reasoning as above.
1339Value *ActualReplacement = Replacement;
1340if (Replacement->getType() != ToReplace->getType()) {
1341 ActualReplacement =
1342 Builder.CreateBitCast(Replacement, ToReplace->getType());
1343 }
1344 ToReplace->replaceAllUsesWith(ActualReplacement);
1345 ToReplace->eraseFromParent();
1346
1347 MadeChange =true;
1348 }
1349return MadeChange;
1350}
1351
1352// Turns this:
1353//
1354// %base = ...
1355// %ptr = gep %base + 15
1356// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1357// %base' = relocate(%tok, i32 4, i32 4)
1358// %ptr' = relocate(%tok, i32 4, i32 5)
1359// %val = load %ptr'
1360//
1361// into this:
1362//
1363// %base = ...
1364// %ptr = gep %base + 15
1365// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1366// %base' = gc.relocate(%tok, i32 4, i32 4)
1367// %ptr' = gep %base' + 15
1368// %val = load %ptr'
1369bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1370bool MadeChange =false;
1371SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1372for (auto *U :I.users())
1373if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1374// Collect all the relocate calls associated with a statepoint
1375 AllRelocateCalls.push_back(Relocate);
1376
1377// We need at least one base pointer relocation + one derived pointer
1378// relocation to mangle
1379if (AllRelocateCalls.size() < 2)
1380returnfalse;
1381
1382// RelocateInstMap is a mapping from the base relocate instruction to the
1383// corresponding derived relocate instructions
1384MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1385computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1386if (RelocateInstMap.empty())
1387returnfalse;
1388
1389for (auto &Item : RelocateInstMap)
1390// Item.first is the RelocatedBase to offset against
1391// Item.second is the vector of Targets to replace
1392 MadeChange =simplifyRelocatesOffABase(Item.first, Item.second);
1393return MadeChange;
1394}
1395
1396/// Sink the specified cast instruction into its user blocks.
1397staticboolSinkCast(CastInst *CI) {
1398BasicBlock *DefBB = CI->getParent();
1399
1400 /// InsertedCasts - Only insert a cast in each block once.
1401DenseMap<BasicBlock *, CastInst *> InsertedCasts;
1402
1403bool MadeChange =false;
1404for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1405 UI != E;) {
1406Use &TheUse = UI.getUse();
1407Instruction *User = cast<Instruction>(*UI);
1408
1409// Figure out which BB this cast is used in. For PHI's this is the
1410// appropriate predecessor block.
1411BasicBlock *UserBB =User->getParent();
1412if (PHINode *PN = dyn_cast<PHINode>(User)) {
1413 UserBB = PN->getIncomingBlock(TheUse);
1414 }
1415
1416// Preincrement use iterator so we don't invalidate it.
1417 ++UI;
1418
1419// The first insertion point of a block containing an EH pad is after the
1420// pad. If the pad is the user, we cannot sink the cast past the pad.
1421if (User->isEHPad())
1422continue;
1423
1424// If the block selected to receive the cast is an EH pad that does not
1425// allow non-PHI instructions before the terminator, we can't sink the
1426// cast.
1427if (UserBB->getTerminator()->isEHPad())
1428continue;
1429
1430// If this user is in the same block as the cast, don't change the cast.
1431if (UserBB == DefBB)
1432continue;
1433
1434// If we have already inserted a cast into this block, use it.
1435CastInst *&InsertedCast = InsertedCasts[UserBB];
1436
1437if (!InsertedCast) {
1438BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1439assert(InsertPt != UserBB->end());
1440 InsertedCast = cast<CastInst>(CI->clone());
1441 InsertedCast->insertBefore(*UserBB, InsertPt);
1442 }
1443
1444// Replace a use of the cast with a use of the new cast.
1445 TheUse = InsertedCast;
1446 MadeChange =true;
1447 ++NumCastUses;
1448 }
1449
1450// If we removed all uses, nuke the cast.
1451if (CI->use_empty()) {
1452salvageDebugInfo(*CI);
1453 CI->eraseFromParent();
1454 MadeChange =true;
1455 }
1456
1457return MadeChange;
1458}
1459
1460/// If the specified cast instruction is a noop copy (e.g. it's casting from
1461/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1462/// reduce the number of virtual registers that must be created and coalesced.
1463///
1464/// Return true if any changes are made.
1465staticboolOptimizeNoopCopyExpression(CastInst *CI,constTargetLowering &TLI,
1466constDataLayout &DL) {
1467// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1468// than sinking only nop casts, but is helpful on some platforms.
1469if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1470if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1471 ASC->getDestAddressSpace()))
1472returnfalse;
1473 }
1474
1475// If this is a noop copy,
1476EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1477EVT DstVT = TLI.getValueType(DL, CI->getType());
1478
1479// This is an fp<->int conversion?
1480if (SrcVT.isInteger() != DstVT.isInteger())
1481returnfalse;
1482
1483// If this is an extension, it will be a zero or sign extension, which
1484// isn't a noop.
1485if (SrcVT.bitsLT(DstVT))
1486returnfalse;
1487
1488// If these values will be promoted, find out what they will be promoted
1489// to. This helps us consider truncates on PPC as noop copies when they
1490// are.
1491if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1492TargetLowering::TypePromoteInteger)
1493 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1494if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1495TargetLowering::TypePromoteInteger)
1496 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1497
1498// If, after promotion, these are the same types, this is a noop copy.
1499if (SrcVT != DstVT)
1500returnfalse;
1501
1502returnSinkCast(CI);
1503}
1504
1505// Match a simple increment by constant operation. Note that if a sub is
1506// matched, the step is negated (as if the step had been canonicalized to
1507// an add, even though we leave the instruction alone.)
1508staticboolmatchIncrement(constInstruction *IVInc,Instruction *&LHS,
1509Constant *&Step) {
1510if (match(IVInc,m_Add(m_Instruction(LHS),m_Constant(Step))) ||
1511match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1512m_Instruction(LHS),m_Constant(Step)))))
1513returntrue;
1514if (match(IVInc,m_Sub(m_Instruction(LHS),m_Constant(Step))) ||
1515match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1516m_Instruction(LHS),m_Constant(Step))))) {
1517 Step =ConstantExpr::getNeg(Step);
1518returntrue;
1519 }
1520returnfalse;
1521}
1522
1523/// If given \p PN is an inductive variable with value IVInc coming from the
1524/// backedge, and on each iteration it gets increased by Step, return pair
1525/// <IVInc, Step>. Otherwise, return std::nullopt.
1526static std::optional<std::pair<Instruction *, Constant *>>
1527getIVIncrement(constPHINode *PN,constLoopInfo *LI) {
1528constLoop *L = LI->getLoopFor(PN->getParent());
1529if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1530return std::nullopt;
1531auto *IVInc =
1532 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1533if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1534return std::nullopt;
1535Instruction *LHS =nullptr;
1536Constant *Step =nullptr;
1537if (matchIncrement(IVInc,LHS, Step) &&LHS == PN)
1538return std::make_pair(IVInc, Step);
1539return std::nullopt;
1540}
1541
1542staticboolisIVIncrement(constValue *V,constLoopInfo *LI) {
1543auto *I = dyn_cast<Instruction>(V);
1544if (!I)
1545returnfalse;
1546Instruction *LHS =nullptr;
1547Constant *Step =nullptr;
1548if (!matchIncrement(I,LHS, Step))
1549returnfalse;
1550if (auto *PN = dyn_cast<PHINode>(LHS))
1551if (auto IVInc =getIVIncrement(PN, LI))
1552return IVInc->first ==I;
1553returnfalse;
1554}
1555
1556bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1557Value *Arg0,Value *Arg1,
1558CmpInst *Cmp,
1559Intrinsic::ID IID) {
1560auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1561if (!isIVIncrement(BO, LI))
1562returnfalse;
1563constLoop *L = LI->getLoopFor(BO->getParent());
1564assert(L &&"L should not be null after isIVIncrement()");
1565// Do not risk on moving increment into a child loop.
1566if (LI->getLoopFor(Cmp->getParent()) != L)
1567returnfalse;
1568
1569// Finally, we need to ensure that the insert point will dominate all
1570// existing uses of the increment.
1571
1572auto &DT = getDT(*BO->getParent()->getParent());
1573if (DT.dominates(Cmp->getParent(), BO->getParent()))
1574// If we're moving up the dom tree, all uses are trivially dominated.
1575// (This is the common case for code produced by LSR.)
1576returntrue;
1577
1578// Otherwise, special case the single use in the phi recurrence.
1579return BO->hasOneUse() && DT.dominates(Cmp->getParent(),L->getLoopLatch());
1580 };
1581if (BO->getParent() !=Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1582// We used to use a dominator tree here to allow multi-block optimization.
1583// But that was problematic because:
1584// 1. It could cause a perf regression by hoisting the math op into the
1585// critical path.
1586// 2. It could cause a perf regression by creating a value that was live
1587// across multiple blocks and increasing register pressure.
1588// 3. Use of a dominator tree could cause large compile-time regression.
1589// This is because we recompute the DT on every change in the main CGP
1590// run-loop. The recomputing is probably unnecessary in many cases, so if
1591// that was fixed, using a DT here would be ok.
1592//
1593// There is one important particular case we still want to handle: if BO is
1594// the IV increment. Important properties that make it profitable:
1595// - We can speculate IV increment anywhere in the loop (as long as the
1596// indvar Phi is its only user);
1597// - Upon computing Cmp, we effectively compute something equivalent to the
1598// IV increment (despite it loops differently in the IR). So moving it up
1599// to the cmp point does not really increase register pressure.
1600returnfalse;
1601 }
1602
1603// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1604if (BO->getOpcode() == Instruction::Add &&
1605 IID == Intrinsic::usub_with_overflow) {
1606assert(isa<Constant>(Arg1) &&"Unexpected input for usubo");
1607 Arg1 =ConstantExpr::getNeg(cast<Constant>(Arg1));
1608 }
1609
1610// Insert at the first instruction of the pair.
1611Instruction *InsertPt =nullptr;
1612for (Instruction &Iter : *Cmp->getParent()) {
1613// If BO is an XOR, it is not guaranteed that it comes after both inputs to
1614// the overflow intrinsic are defined.
1615if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1616 InsertPt = &Iter;
1617break;
1618 }
1619 }
1620assert(InsertPt !=nullptr &&"Parent block did not contain cmp or binop");
1621
1622IRBuilder<> Builder(InsertPt);
1623Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1624if (BO->getOpcode() != Instruction::Xor) {
1625Value *Math = Builder.CreateExtractValue(MathOV, 0,"math");
1626replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1627 }else
1628assert(BO->hasOneUse() &&
1629"Patterns with XOr should use the BO only in the compare");
1630Value *OV = Builder.CreateExtractValue(MathOV, 1,"ov");
1631replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1632Cmp->eraseFromParent();
1633 BO->eraseFromParent();
1634returntrue;
1635}
1636
1637/// Match special-case patterns that check for unsigned add overflow.
1638staticboolmatchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1639BinaryOperator *&Add) {
1640// Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1641// Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1642Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1643
1644// We are not expecting non-canonical/degenerate code. Just bail out.
1645if (isa<Constant>(A))
1646returnfalse;
1647
1648ICmpInst::Predicate Pred = Cmp->getPredicate();
1649if (Pred ==ICmpInst::ICMP_EQ &&match(B,m_AllOnes()))
1650B = ConstantInt::get(B->getType(), 1);
1651elseif (Pred ==ICmpInst::ICMP_NE &&match(B,m_ZeroInt()))
1652B =Constant::getAllOnesValue(B->getType());
1653else
1654returnfalse;
1655
1656// Check the users of the variable operand of the compare looking for an add
1657// with the adjusted constant.
1658for (User *U :A->users()) {
1659if (match(U,m_Add(m_Specific(A),m_Specific(B)))) {
1660Add = cast<BinaryOperator>(U);
1661returntrue;
1662 }
1663 }
1664returnfalse;
1665}
1666
1667/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1668/// intrinsic. Return true if any changes were made.
1669bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1670 ModifyDT &ModifiedDT) {
1671bool EdgeCase =false;
1672Value *A, *B;
1673BinaryOperator *Add;
1674if (!match(Cmp,m_UAddWithOverflow(m_Value(A),m_Value(B),m_BinOp(Add)))) {
1675if (!matchUAddWithOverflowConstantEdgeCases(Cmp,Add))
1676returnfalse;
1677// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1678A =Add->getOperand(0);
1679B =Add->getOperand(1);
1680 EdgeCase =true;
1681 }
1682
1683if (!TLI->shouldFormOverflowOp(ISD::UADDO,
1684 TLI->getValueType(*DL,Add->getType()),
1685Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1686returnfalse;
1687
1688// We don't want to move around uses of condition values this late, so we
1689// check if it is legal to create the call to the intrinsic in the basic
1690// block containing the icmp.
1691if (Add->getParent() !=Cmp->getParent() && !Add->hasOneUse())
1692returnfalse;
1693
1694if (!replaceMathCmpWithIntrinsic(Add,A,B, Cmp,
1695 Intrinsic::uadd_with_overflow))
1696returnfalse;
1697
1698// Reset callers - do not crash by iterating over a dead instruction.
1699 ModifiedDT = ModifyDT::ModifyInstDT;
1700returntrue;
1701}
1702
1703bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1704 ModifyDT &ModifiedDT) {
1705// We are not expecting non-canonical/degenerate code. Just bail out.
1706Value *A =Cmp->getOperand(0), *B =Cmp->getOperand(1);
1707if (isa<Constant>(A) && isa<Constant>(B))
1708returnfalse;
1709
1710// Convert (A u> B) to (A u< B) to simplify pattern matching.
1711ICmpInst::Predicate Pred =Cmp->getPredicate();
1712if (Pred ==ICmpInst::ICMP_UGT) {
1713std::swap(A,B);
1714 Pred =ICmpInst::ICMP_ULT;
1715 }
1716// Convert special-case: (A == 0) is the same as (A u< 1).
1717if (Pred ==ICmpInst::ICMP_EQ &&match(B,m_ZeroInt())) {
1718B = ConstantInt::get(B->getType(), 1);
1719 Pred =ICmpInst::ICMP_ULT;
1720 }
1721// Convert special-case: (A != 0) is the same as (0 u< A).
1722if (Pred ==ICmpInst::ICMP_NE &&match(B,m_ZeroInt())) {
1723std::swap(A,B);
1724 Pred =ICmpInst::ICMP_ULT;
1725 }
1726if (Pred !=ICmpInst::ICMP_ULT)
1727returnfalse;
1728
1729// Walk the users of a variable operand of a compare looking for a subtract or
1730// add with that same operand. Also match the 2nd operand of the compare to
1731// the add/sub, but that may be a negated constant operand of an add.
1732Value *CmpVariableOperand = isa<Constant>(A) ?B :A;
1733BinaryOperator *Sub =nullptr;
1734for (User *U : CmpVariableOperand->users()) {
1735// A - B, A u< B --> usubo(A, B)
1736if (match(U,m_Sub(m_Specific(A),m_Specific(B)))) {
1737 Sub = cast<BinaryOperator>(U);
1738break;
1739 }
1740
1741// A + (-C), A u< C (canonicalized form of (sub A, C))
1742constAPInt *CmpC, *AddC;
1743if (match(U,m_Add(m_Specific(A),m_APInt(AddC))) &&
1744match(B,m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1745 Sub = cast<BinaryOperator>(U);
1746break;
1747 }
1748 }
1749if (!Sub)
1750returnfalse;
1751
1752if (!TLI->shouldFormOverflowOp(ISD::USUBO,
1753 TLI->getValueType(*DL, Sub->getType()),
1754 Sub->hasNUsesOrMore(1)))
1755returnfalse;
1756
1757if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1758 Cmp, Intrinsic::usub_with_overflow))
1759returnfalse;
1760
1761// Reset callers - do not crash by iterating over a dead instruction.
1762 ModifiedDT = ModifyDT::ModifyInstDT;
1763returntrue;
1764}
1765
1766/// Sink the given CmpInst into user blocks to reduce the number of virtual
1767/// registers that must be created and coalesced. This is a clear win except on
1768/// targets with multiple condition code registers (PowerPC), where it might
1769/// lose; some adjustment may be wanted there.
1770///
1771/// Return true if any changes are made.
1772staticboolsinkCmpExpression(CmpInst *Cmp,constTargetLowering &TLI) {
1773if (TLI.hasMultipleConditionRegisters())
1774returnfalse;
1775
1776// Avoid sinking soft-FP comparisons, since this can move them into a loop.
1777if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1778returnfalse;
1779
1780// Only insert a cmp in each block once.
1781DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
1782
1783bool MadeChange =false;
1784for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1785 UI != E;) {
1786Use &TheUse = UI.getUse();
1787Instruction *User = cast<Instruction>(*UI);
1788
1789// Preincrement use iterator so we don't invalidate it.
1790 ++UI;
1791
1792// Don't bother for PHI nodes.
1793if (isa<PHINode>(User))
1794continue;
1795
1796// Figure out which BB this cmp is used in.
1797BasicBlock *UserBB =User->getParent();
1798BasicBlock *DefBB = Cmp->getParent();
1799
1800// If this user is in the same block as the cmp, don't change the cmp.
1801if (UserBB == DefBB)
1802continue;
1803
1804// If we have already inserted a cmp into this block, use it.
1805CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1806
1807if (!InsertedCmp) {
1808BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1809assert(InsertPt != UserBB->end());
1810 InsertedCmp =CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1811 Cmp->getOperand(0), Cmp->getOperand(1),"");
1812 InsertedCmp->insertBefore(*UserBB, InsertPt);
1813// Propagate the debug info.
1814 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1815 }
1816
1817// Replace a use of the cmp with a use of the new cmp.
1818 TheUse = InsertedCmp;
1819 MadeChange =true;
1820 ++NumCmpUses;
1821 }
1822
1823// If we removed all uses, nuke the cmp.
1824if (Cmp->use_empty()) {
1825 Cmp->eraseFromParent();
1826 MadeChange =true;
1827 }
1828
1829return MadeChange;
1830}
1831
1832/// For pattern like:
1833///
1834/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1835/// ...
1836/// DomBB:
1837/// ...
1838/// br DomCond, TrueBB, CmpBB
1839/// CmpBB: (with DomBB being the single predecessor)
1840/// ...
1841/// Cmp = icmp eq CmpOp0, CmpOp1
1842/// ...
1843///
1844/// It would use two comparison on targets that lowering of icmp sgt/slt is
1845/// different from lowering of icmp eq (PowerPC). This function try to convert
1846/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1847/// After that, DomCond and Cmp can use the same comparison so reduce one
1848/// comparison.
1849///
1850/// Return true if any changes are made.
1851staticboolfoldICmpWithDominatingICmp(CmpInst *Cmp,
1852constTargetLowering &TLI) {
1853if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
1854returnfalse;
1855
1856ICmpInst::Predicate Pred = Cmp->getPredicate();
1857if (Pred !=ICmpInst::ICMP_EQ)
1858returnfalse;
1859
1860// If icmp eq has users other than BranchInst and SelectInst, converting it to
1861// icmp slt/sgt would introduce more redundant LLVM IR.
1862for (User *U : Cmp->users()) {
1863if (isa<BranchInst>(U))
1864continue;
1865if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1866continue;
1867returnfalse;
1868 }
1869
1870// This is a cheap/incomplete check for dominance - just match a single
1871// predecessor with a conditional branch.
1872BasicBlock *CmpBB = Cmp->getParent();
1873BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1874if (!DomBB)
1875returnfalse;
1876
1877// We want to ensure that the only way control gets to the comparison of
1878// interest is that a less/greater than comparison on the same operands is
1879// false.
1880Value *DomCond;
1881BasicBlock *TrueBB, *FalseBB;
1882if (!match(DomBB->getTerminator(),m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1883returnfalse;
1884if (CmpBB != FalseBB)
1885returnfalse;
1886
1887Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1888CmpPredicate DomPred;
1889if (!match(DomCond,m_ICmp(DomPred,m_Specific(CmpOp0),m_Specific(CmpOp1))))
1890returnfalse;
1891if (DomPred !=ICmpInst::ICMP_SGT && DomPred !=ICmpInst::ICMP_SLT)
1892returnfalse;
1893
1894// Convert the equality comparison to the opposite of the dominating
1895// comparison and swap the direction for all branch/select users.
1896// We have conceptually converted:
1897// Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1898// to
1899// Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1900// And similarly for branches.
1901for (User *U : Cmp->users()) {
1902if (auto *BI = dyn_cast<BranchInst>(U)) {
1903assert(BI->isConditional() &&"Must be conditional");
1904 BI->swapSuccessors();
1905continue;
1906 }
1907if (auto *SI = dyn_cast<SelectInst>(U)) {
1908// Swap operands
1909 SI->swapValues();
1910 SI->swapProfMetadata();
1911continue;
1912 }
1913llvm_unreachable("Must be a branch or a select");
1914 }
1915 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1916returntrue;
1917}
1918
1919/// Many architectures use the same instruction for both subtract and cmp. Try
1920/// to swap cmp operands to match subtract operations to allow for CSE.
1921staticboolswapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
1922Value *Op0 = Cmp->getOperand(0);
1923Value *Op1 = Cmp->getOperand(1);
1924if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1925 isa<Constant>(Op1) || Op0 == Op1)
1926returnfalse;
1927
1928// If a subtract already has the same operands as a compare, swapping would be
1929// bad. If a subtract has the same operands as a compare but in reverse order,
1930// then swapping is good.
1931int GoodToSwap = 0;
1932unsigned NumInspected = 0;
1933for (constUser *U : Op0->users()) {
1934// Avoid walking many users.
1935if (++NumInspected > 128)
1936returnfalse;
1937if (match(U,m_Sub(m_Specific(Op1),m_Specific(Op0))))
1938 GoodToSwap++;
1939elseif (match(U,m_Sub(m_Specific(Op0),m_Specific(Op1))))
1940 GoodToSwap--;
1941 }
1942
1943if (GoodToSwap > 0) {
1944 Cmp->swapOperands();
1945returntrue;
1946 }
1947returnfalse;
1948}
1949
1950staticboolfoldFCmpToFPClassTest(CmpInst *Cmp,constTargetLowering &TLI,
1951constDataLayout &DL) {
1952FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1953if (!FCmp)
1954returnfalse;
1955
1956// Don't fold if the target offers free fabs and the predicate is legal.
1957EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
1958if (TLI.isFAbsFree(VT) &&
1959 TLI.isCondCodeLegal(getFCmpCondCode(FCmp->getPredicate()),
1960 VT.getSimpleVT()))
1961returnfalse;
1962
1963// Reverse the canonicalization if it is a FP class test
1964auto ShouldReverseTransform = [](FPClassTest ClassTest) {
1965return ClassTest ==fcInf || ClassTest == (fcInf |fcNan);
1966 };
1967auto [ClassVal, ClassTest] =
1968fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
1969 FCmp->getOperand(0), FCmp->getOperand(1));
1970if (!ClassVal)
1971returnfalse;
1972
1973if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1974returnfalse;
1975
1976IRBuilder<> Builder(Cmp);
1977Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
1978 Cmp->replaceAllUsesWith(IsFPClass);
1979RecursivelyDeleteTriviallyDeadInstructions(Cmp);
1980returntrue;
1981}
1982
1983staticboolisRemOfLoopIncrementWithLoopInvariant(
1984Instruction *Rem,constLoopInfo *LI,Value *&RemAmtOut,Value *&AddInstOut,
1985Value *&AddOffsetOut,PHINode *&LoopIncrPNOut) {
1986Value *Incr, *RemAmt;
1987// NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1988if (!match(Rem,m_URem(m_Value(Incr),m_Value(RemAmt))))
1989returnfalse;
1990
1991Value *AddInst, *AddOffset;
1992// Find out loop increment PHI.
1993auto *PN = dyn_cast<PHINode>(Incr);
1994if (PN !=nullptr) {
1995 AddInst =nullptr;
1996 AddOffset =nullptr;
1997 }else {
1998// Search through a NUW add on top of the loop increment.
1999Value *V0, *V1;
2000if (!match(Incr,m_NUWAdd(m_Value(V0),m_Value(V1))))
2001returnfalse;
2002
2003 AddInst = Incr;
2004 PN = dyn_cast<PHINode>(V0);
2005if (PN !=nullptr) {
2006 AddOffset = V1;
2007 }else {
2008 PN = dyn_cast<PHINode>(V1);
2009 AddOffset = V0;
2010 }
2011 }
2012
2013if (!PN)
2014returnfalse;
2015
2016// This isn't strictly necessary, what we really need is one increment and any
2017// amount of initial values all being the same.
2018if (PN->getNumIncomingValues() != 2)
2019returnfalse;
2020
2021// Only trivially analyzable loops.
2022Loop *L = LI->getLoopFor(PN->getParent());
2023if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2024returnfalse;
2025
2026// Req that the remainder is in the loop
2027if (!L->contains(Rem))
2028returnfalse;
2029
2030// Only works if the remainder amount is a loop invaraint
2031if (!L->isLoopInvariant(RemAmt))
2032returnfalse;
2033
2034// Is the PHI a loop increment?
2035auto LoopIncrInfo =getIVIncrement(PN, LI);
2036if (!LoopIncrInfo)
2037returnfalse;
2038
2039// We need remainder_amount % increment_amount to be zero. Increment of one
2040// satisfies that without any special logic and is overwhelmingly the common
2041// case.
2042if (!match(LoopIncrInfo->second,m_One()))
2043returnfalse;
2044
2045// Need the increment to not overflow.
2046if (!match(LoopIncrInfo->first,m_c_NUWAdd(m_Specific(PN),m_Value())))
2047returnfalse;
2048
2049// Set output variables.
2050 RemAmtOut = RemAmt;
2051 LoopIncrPNOut = PN;
2052 AddInstOut = AddInst;
2053 AddOffsetOut = AddOffset;
2054
2055returntrue;
2056}
2057
2058// Try to transform:
2059//
2060// for(i = Start; i < End; ++i)
2061// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2062//
2063// ->
2064//
2065// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2066// for(i = Start; i < End; ++i, ++rem)
2067// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2068staticboolfoldURemOfLoopIncrement(Instruction *Rem,constDataLayout *DL,
2069constLoopInfo *LI,
2070SmallSet<BasicBlock *, 32> &FreshBBs,
2071bool IsHuge) {
2072Value *AddOffset, *RemAmt, *AddInst;
2073PHINode *LoopIncrPN;
2074if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2075 AddOffset, LoopIncrPN))
2076returnfalse;
2077
2078// Only non-constant remainder as the extra IV is probably not profitable
2079// in that case.
2080//
2081// Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2082// we can rule out register pressure and ensure this `urem` is executed each
2083// iteration, its probably profitable to handle the const case as well.
2084//
2085// Potential TODO(2): Should we have a check for how "nested" this remainder
2086// operation is? The new code runs every iteration so if the remainder is
2087// guarded behind unlikely conditions this might not be worth it.
2088if (match(RemAmt,m_ImmConstant()))
2089returnfalse;
2090
2091Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2092Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2093// If we have add create initial value for remainder.
2094// The logic here is:
2095// (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2096//
2097// Only proceed if the expression simplifies (otherwise we can't fully
2098// optimize out the urem).
2099if (AddInst) {
2100assert(AddOffset &&"We found an add but missing values");
2101// Without dom-condition/assumption cache we aren't likely to get much out
2102// of a context instruction.
2103 Start =simplifyAddInst(Start, AddOffset,
2104match(AddInst,m_NSWAdd(m_Value(),m_Value())),
2105/*IsNUW=*/true, *DL);
2106if (!Start)
2107returnfalse;
2108 }
2109
2110// If we can't fully optimize out the `rem`, skip this transform.
2111 Start =simplifyURemInst(Start, RemAmt, *DL);
2112if (!Start)
2113returnfalse;
2114
2115// Create new remainder with induction variable.
2116Type *Ty = Rem->getType();
2117IRBuilder<> Builder(Rem->getContext());
2118
2119 Builder.SetInsertPoint(LoopIncrPN);
2120PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2121
2122 Builder.SetInsertPoint(cast<Instruction>(
2123 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2124// `(add (urem x, y), 1)` is always nuw.
2125Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2126Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2127Value *RemSel =
2128 Builder.CreateSelect(RemCmp,Constant::getNullValue(Ty), RemAdd);
2129
2130 NewRem->addIncoming(Start, L->getLoopPreheader());
2131 NewRem->addIncoming(RemSel, L->getLoopLatch());
2132
2133// Insert all touched BBs.
2134 FreshBBs.insert(LoopIncrPN->getParent());
2135 FreshBBs.insert(L->getLoopLatch());
2136 FreshBBs.insert(Rem->getParent());
2137if (AddInst)
2138 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2139replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2140 Rem->eraseFromParent();
2141if (AddInst && AddInst->use_empty())
2142 cast<Instruction>(AddInst)->eraseFromParent();
2143returntrue;
2144}
2145
2146bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2147if (foldURemOfLoopIncrement(Rem,DL, LI, FreshBBs, IsHugeFunc))
2148returntrue;
2149returnfalse;
2150}
2151
2152/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2153/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2154/// result cannot be zero.
2155staticbooladjustIsPower2Test(CmpInst *Cmp,constTargetLowering &TLI,
2156constTargetTransformInfo &TTI,
2157constDataLayout &DL) {
2158CmpPredicate Pred;
2159if (!match(Cmp,m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(),m_One())))
2160returnfalse;
2161if (!ICmpInst::isEquality(Pred))
2162returnfalse;
2163auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
2164
2165if (isKnownNonZero(II,DL)) {
2166if (Pred ==ICmpInst::ICMP_EQ) {
2167 Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
2168 Cmp->setPredicate(ICmpInst::ICMP_ULT);
2169 }else {
2170 Cmp->setPredicate(ICmpInst::ICMP_UGT);
2171 }
2172returntrue;
2173 }
2174returnfalse;
2175}
2176
2177bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2178if (sinkCmpExpression(Cmp, *TLI))
2179returntrue;
2180
2181if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2182returntrue;
2183
2184if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2185returntrue;
2186
2187if (foldICmpWithDominatingICmp(Cmp, *TLI))
2188returntrue;
2189
2190if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
2191returntrue;
2192
2193if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2194returntrue;
2195
2196if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
2197returntrue;
2198
2199returnfalse;
2200}
2201
2202/// Duplicate and sink the given 'and' instruction into user blocks where it is
2203/// used in a compare to allow isel to generate better code for targets where
2204/// this operation can be combined.
2205///
2206/// Return true if any changes are made.
2207staticboolsinkAndCmp0Expression(Instruction *AndI,constTargetLowering &TLI,
2208 SetOfInstrs &InsertedInsts) {
2209// Double-check that we're not trying to optimize an instruction that was
2210// already optimized by some other part of this pass.
2211assert(!InsertedInsts.count(AndI) &&
2212"Attempting to optimize already optimized and instruction");
2213 (void)InsertedInsts;
2214
2215// Nothing to do for single use in same basic block.
2216if (AndI->hasOneUse() &&
2217 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2218returnfalse;
2219
2220// Try to avoid cases where sinking/duplicating is likely to increase register
2221// pressure.
2222if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2223 !isa<ConstantInt>(AndI->getOperand(1)) &&
2224 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2225returnfalse;
2226
2227for (auto *U : AndI->users()) {
2228Instruction *User = cast<Instruction>(U);
2229
2230// Only sink 'and' feeding icmp with 0.
2231if (!isa<ICmpInst>(User))
2232returnfalse;
2233
2234auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2235if (!CmpC || !CmpC->isZero())
2236returnfalse;
2237 }
2238
2239if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2240returnfalse;
2241
2242LLVM_DEBUG(dbgs() <<"found 'and' feeding only icmp 0;\n");
2243LLVM_DEBUG(AndI->getParent()->dump());
2244
2245// Push the 'and' into the same block as the icmp 0. There should only be
2246// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2247// others, so we don't need to keep track of which BBs we insert into.
2248for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2249 UI != E;) {
2250Use &TheUse = UI.getUse();
2251Instruction *User = cast<Instruction>(*UI);
2252
2253// Preincrement use iterator so we don't invalidate it.
2254 ++UI;
2255
2256LLVM_DEBUG(dbgs() <<"sinking 'and' use: " << *User <<"\n");
2257
2258// Keep the 'and' in the same place if the use is already in the same block.
2259Instruction *InsertPt =
2260User->getParent() == AndI->getParent() ? AndI :User;
2261Instruction *InsertedAnd =BinaryOperator::Create(
2262 Instruction::And, AndI->getOperand(0), AndI->getOperand(1),"",
2263 InsertPt->getIterator());
2264// Propagate the debug info.
2265 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2266
2267// Replace a use of the 'and' with a use of the new 'and'.
2268 TheUse = InsertedAnd;
2269 ++NumAndUses;
2270LLVM_DEBUG(User->getParent()->dump());
2271 }
2272
2273// We removed all uses, nuke the and.
2274 AndI->eraseFromParent();
2275returntrue;
2276}
2277
2278/// Check if the candidates could be combined with a shift instruction, which
2279/// includes:
2280/// 1. Truncate instruction
2281/// 2. And instruction and the imm is a mask of the low bits:
2282/// imm & (imm+1) == 0
2283staticboolisExtractBitsCandidateUse(Instruction *User) {
2284if (!isa<TruncInst>(User)) {
2285if (User->getOpcode() != Instruction::And ||
2286 !isa<ConstantInt>(User->getOperand(1)))
2287returnfalse;
2288
2289constAPInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2290
2291if ((Cimm & (Cimm + 1)).getBoolValue())
2292returnfalse;
2293 }
2294returntrue;
2295}
2296
2297/// Sink both shift and truncate instruction to the use of truncate's BB.
2298staticbool
2299SinkShiftAndTruncate(BinaryOperator *ShiftI,Instruction *User,ConstantInt *CI,
2300DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
2301constTargetLowering &TLI,constDataLayout &DL) {
2302BasicBlock *UserBB =User->getParent();
2303DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
2304auto *TruncI = cast<TruncInst>(User);
2305bool MadeChange =false;
2306
2307for (Value::user_iterator TruncUI = TruncI->user_begin(),
2308 TruncE = TruncI->user_end();
2309 TruncUI != TruncE;) {
2310
2311Use &TruncTheUse = TruncUI.getUse();
2312Instruction *TruncUser = cast<Instruction>(*TruncUI);
2313// Preincrement use iterator so we don't invalidate it.
2314
2315 ++TruncUI;
2316
2317int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2318if (!ISDOpcode)
2319continue;
2320
2321// If the use is actually a legal node, there will not be an
2322// implicit truncate.
2323// FIXME: always querying the result type is just an
2324// approximation; some nodes' legality is determined by the
2325// operand or other means. There's no good way to find out though.
2326if (TLI.isOperationLegalOrCustom(
2327 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(),true)))
2328continue;
2329
2330// Don't bother for PHI nodes.
2331if (isa<PHINode>(TruncUser))
2332continue;
2333
2334BasicBlock *TruncUserBB = TruncUser->getParent();
2335
2336if (UserBB == TruncUserBB)
2337continue;
2338
2339BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2340CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2341
2342if (!InsertedShift && !InsertedTrunc) {
2343BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2344assert(InsertPt != TruncUserBB->end());
2345// Sink the shift
2346if (ShiftI->getOpcode() == Instruction::AShr)
2347 InsertedShift =
2348 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,"");
2349else
2350 InsertedShift =
2351 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,"");
2352 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2353 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2354
2355// Sink the trunc
2356BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2357 TruncInsertPt++;
2358// It will go ahead of any debug-info.
2359 TruncInsertPt.setHeadBit(true);
2360assert(TruncInsertPt != TruncUserBB->end());
2361
2362 InsertedTrunc =CastInst::Create(TruncI->getOpcode(), InsertedShift,
2363 TruncI->getType(),"");
2364 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2365 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2366
2367 MadeChange =true;
2368
2369 TruncTheUse = InsertedTrunc;
2370 }
2371 }
2372return MadeChange;
2373}
2374
2375/// Sink the shift *right* instruction into user blocks if the uses could
2376/// potentially be combined with this shift instruction and generate BitExtract
2377/// instruction. It will only be applied if the architecture supports BitExtract
2378/// instruction. Here is an example:
2379/// BB1:
2380/// %x.extract.shift = lshr i64 %arg1, 32
2381/// BB2:
2382/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2383/// ==>
2384///
2385/// BB2:
2386/// %x.extract.shift.1 = lshr i64 %arg1, 32
2387/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2388///
2389/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2390/// instruction.
2391/// Return true if any changes are made.
2392staticboolOptimizeExtractBits(BinaryOperator *ShiftI,ConstantInt *CI,
2393constTargetLowering &TLI,
2394constDataLayout &DL) {
2395BasicBlock *DefBB = ShiftI->getParent();
2396
2397 /// Only insert instructions in each block once.
2398DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
2399
2400bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2401
2402bool MadeChange =false;
2403for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2404 UI != E;) {
2405Use &TheUse = UI.getUse();
2406Instruction *User = cast<Instruction>(*UI);
2407// Preincrement use iterator so we don't invalidate it.
2408 ++UI;
2409
2410// Don't bother for PHI nodes.
2411if (isa<PHINode>(User))
2412continue;
2413
2414if (!isExtractBitsCandidateUse(User))
2415continue;
2416
2417BasicBlock *UserBB =User->getParent();
2418
2419if (UserBB == DefBB) {
2420// If the shift and truncate instruction are in the same BB. The use of
2421// the truncate(TruncUse) may still introduce another truncate if not
2422// legal. In this case, we would like to sink both shift and truncate
2423// instruction to the BB of TruncUse.
2424// for example:
2425// BB1:
2426// i64 shift.result = lshr i64 opnd, imm
2427// trunc.result = trunc shift.result to i16
2428//
2429// BB2:
2430// ----> We will have an implicit truncate here if the architecture does
2431// not have i16 compare.
2432// cmp i16 trunc.result, opnd2
2433//
2434if (isa<TruncInst>(User) &&
2435 shiftIsLegal
2436// If the type of the truncate is legal, no truncate will be
2437// introduced in other basic blocks.
2438 && (!TLI.isTypeLegal(TLI.getValueType(DL,User->getType()))))
2439 MadeChange =
2440SinkShiftAndTruncate(ShiftI,User, CI, InsertedShifts, TLI,DL);
2441
2442continue;
2443 }
2444// If we have already inserted a shift into this block, use it.
2445BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2446
2447if (!InsertedShift) {
2448BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2449assert(InsertPt != UserBB->end());
2450
2451if (ShiftI->getOpcode() == Instruction::AShr)
2452 InsertedShift =
2453 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,"");
2454else
2455 InsertedShift =
2456 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,"");
2457 InsertedShift->insertBefore(*UserBB, InsertPt);
2458 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2459
2460 MadeChange =true;
2461 }
2462
2463// Replace a use of the shift with a use of the new shift.
2464 TheUse = InsertedShift;
2465 }
2466
2467// If we removed all uses, or there are none, nuke the shift.
2468if (ShiftI->use_empty()) {
2469salvageDebugInfo(*ShiftI);
2470 ShiftI->eraseFromParent();
2471 MadeChange =true;
2472 }
2473
2474return MadeChange;
2475}
2476
2477/// If counting leading or trailing zeros is an expensive operation and a zero
2478/// input is defined, add a check for zero to avoid calling the intrinsic.
2479///
2480/// We want to transform:
2481/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2482///
2483/// into:
2484/// entry:
2485/// %cmpz = icmp eq i64 %A, 0
2486/// br i1 %cmpz, label %cond.end, label %cond.false
2487/// cond.false:
2488/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2489/// br label %cond.end
2490/// cond.end:
2491/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2492///
2493/// If the transform is performed, return true and set ModifiedDT to true.
2494staticbooldespeculateCountZeros(IntrinsicInst *CountZeros,
2495LoopInfo &LI,
2496constTargetLowering *TLI,
2497constDataLayout *DL, ModifyDT &ModifiedDT,
2498SmallSet<BasicBlock *, 32> &FreshBBs,
2499bool IsHugeFunc) {
2500// If a zero input is undefined, it doesn't make sense to despeculate that.
2501if (match(CountZeros->getOperand(1),m_One()))
2502returnfalse;
2503
2504// If it's cheap to speculate, there's nothing to do.
2505Type *Ty = CountZeros->getType();
2506auto IntrinsicID = CountZeros->getIntrinsicID();
2507if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2508 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2509returnfalse;
2510
2511// Only handle legal scalar cases. Anything else requires too much work.
2512unsigned SizeInBits = Ty->getScalarSizeInBits();
2513if (Ty->isVectorTy() || SizeInBits >DL->getLargestLegalIntTypeSizeInBits())
2514returnfalse;
2515
2516// Bail if the value is never zero.
2517Use &Op = CountZeros->getOperandUse(0);
2518if (isKnownNonZero(Op, *DL))
2519returnfalse;
2520
2521// The intrinsic will be sunk behind a compare against zero and branch.
2522BasicBlock *StartBlock = CountZeros->getParent();
2523BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros,"cond.false");
2524if (IsHugeFunc)
2525 FreshBBs.insert(CallBlock);
2526
2527// Create another block after the count zero intrinsic. A PHI will be added
2528// in this block to select the result of the intrinsic or the bit-width
2529// constant if the input to the intrinsic is zero.
2530BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2531// Any debug-info after CountZeros should not be included.
2532 SplitPt.setHeadBit(true);
2533BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt,"cond.end");
2534if (IsHugeFunc)
2535 FreshBBs.insert(EndBlock);
2536
2537// Update the LoopInfo. The new blocks are in the same loop as the start
2538// block.
2539if (Loop *L = LI.getLoopFor(StartBlock)) {
2540 L->addBasicBlockToLoop(CallBlock, LI);
2541 L->addBasicBlockToLoop(EndBlock, LI);
2542 }
2543
2544// Set up a builder to create a compare, conditional branch, and PHI.
2545IRBuilder<> Builder(CountZeros->getContext());
2546 Builder.SetInsertPoint(StartBlock->getTerminator());
2547 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2548
2549// Replace the unconditional branch that was created by the first split with
2550// a compare against zero and a conditional branch.
2551Value *Zero =Constant::getNullValue(Ty);
2552// Avoid introducing branch on poison. This also replaces the ctz operand.
2553if (!isGuaranteedNotToBeUndefOrPoison(Op))
2554Op = Builder.CreateFreeze(Op,Op->getName() +".fr");
2555Value *Cmp = Builder.CreateICmpEQ(Op, Zero,"cmpz");
2556 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2557 StartBlock->getTerminator()->eraseFromParent();
2558
2559// Create a PHI in the end block to select either the output of the intrinsic
2560// or the bit width of the operand.
2561 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2562PHINode *PN = Builder.CreatePHI(Ty, 2,"ctz");
2563replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2564Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2565 PN->addIncoming(BitWidth, StartBlock);
2566 PN->addIncoming(CountZeros, CallBlock);
2567
2568// We are explicitly handling the zero case, so we can set the intrinsic's
2569// undefined zero argument to 'true'. This will also prevent reprocessing the
2570// intrinsic; we only despeculate when a zero input is defined.
2571 CountZeros->setArgOperand(1, Builder.getTrue());
2572 ModifiedDT = ModifyDT::ModifyBBDT;
2573returntrue;
2574}
2575
2576bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2577BasicBlock *BB = CI->getParent();
2578
2579// Lower inline assembly if we can.
2580// If we found an inline asm expession, and if the target knows how to
2581// lower it to normal LLVM code, do so now.
2582if (CI->isInlineAsm()) {
2583if (TLI->ExpandInlineAsm(CI)) {
2584// Avoid invalidating the iterator.
2585 CurInstIterator = BB->begin();
2586// Avoid processing instructions out of order, which could cause
2587// reuse before a value is defined.
2588 SunkAddrs.clear();
2589returntrue;
2590 }
2591// Sink address computing for memory operands into the block.
2592if (optimizeInlineAsmInst(CI))
2593returntrue;
2594 }
2595
2596// Align the pointer arguments to this call if the target thinks it's a good
2597// idea
2598unsigned MinSize;
2599Align PrefAlign;
2600if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2601for (auto &Arg : CI->args()) {
2602// We want to align both objects whose address is used directly and
2603// objects whose address is used in casts and GEPs, though it only makes
2604// sense for GEPs if the offset is a multiple of the desired alignment and
2605// if size - offset meets the size threshold.
2606if (!Arg->getType()->isPointerTy())
2607continue;
2608APIntOffset(DL->getIndexSizeInBits(
2609 cast<PointerType>(Arg->getType())->getAddressSpace()),
2610 0);
2611Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL,Offset);
2612uint64_t Offset2 =Offset.getLimitedValue();
2613if (!isAligned(PrefAlign, Offset2))
2614continue;
2615AllocaInst *AI;
2616if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2617DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2618 AI->setAlignment(PrefAlign);
2619// Global variables can only be aligned if they are defined in this
2620// object (i.e. they are uniquely initialized in this object), and
2621// over-aligning global variables that have an explicit section is
2622// forbidden.
2623GlobalVariable *GV;
2624if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2625 GV->getPointerAlignment(*DL) < PrefAlign &&
2626DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2627 GV->setAlignment(PrefAlign);
2628 }
2629 }
2630// If this is a memcpy (or similar) then we may be able to improve the
2631// alignment.
2632if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2633Align DestAlign =getKnownAlignment(MI->getDest(), *DL);
2634MaybeAlign MIDestAlign =MI->getDestAlign();
2635if (!MIDestAlign || DestAlign > *MIDestAlign)
2636MI->setDestAlignment(DestAlign);
2637if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2638MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2639Align SrcAlign =getKnownAlignment(MTI->getSource(), *DL);
2640if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2641 MTI->setSourceAlignment(SrcAlign);
2642 }
2643 }
2644
2645// If we have a cold call site, try to sink addressing computation into the
2646// cold block. This interacts with our handling for loads and stores to
2647// ensure that we can fold all uses of a potential addressing computation
2648// into their uses. TODO: generalize this to work over profiling data
2649if (CI->hasFnAttr(Attribute::Cold) &&
2650 !llvm::shouldOptimizeForSize(BB, PSI,BFI.get()))
2651for (auto &Arg : CI->args()) {
2652if (!Arg->getType()->isPointerTy())
2653continue;
2654unsigned AS = Arg->getType()->getPointerAddressSpace();
2655if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2656returntrue;
2657 }
2658
2659IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2660if (II) {
2661switch (II->getIntrinsicID()) {
2662default:
2663break;
2664case Intrinsic::assume:
2665llvm_unreachable("llvm.assume should have been removed already");
2666case Intrinsic::allow_runtime_check:
2667case Intrinsic::allow_ubsan_check:
2668case Intrinsic::experimental_widenable_condition: {
2669// Give up on future widening opportunities so that we can fold away dead
2670// paths and merge blocks before going into block-local instruction
2671// selection.
2672if (II->use_empty()) {
2673II->eraseFromParent();
2674returntrue;
2675 }
2676Constant *RetVal =ConstantInt::getTrue(II->getContext());
2677 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2678replaceAndRecursivelySimplify(CI, RetVal, TLInfo,nullptr);
2679 });
2680returntrue;
2681 }
2682case Intrinsic::objectsize:
2683llvm_unreachable("llvm.objectsize.* should have been lowered already");
2684case Intrinsic::is_constant:
2685llvm_unreachable("llvm.is.constant.* should have been lowered already");
2686case Intrinsic::aarch64_stlxr:
2687case Intrinsic::aarch64_stxr: {
2688ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2689if (!ExtVal || !ExtVal->hasOneUse() ||
2690 ExtVal->getParent() == CI->getParent())
2691returnfalse;
2692// Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2693 ExtVal->moveBefore(CI->getIterator());
2694// Mark this instruction as "inserted by CGP", so that other
2695// optimizations don't touch it.
2696 InsertedInsts.insert(ExtVal);
2697returntrue;
2698 }
2699
2700case Intrinsic::launder_invariant_group:
2701case Intrinsic::strip_invariant_group: {
2702Value *ArgVal =II->getArgOperand(0);
2703auto it = LargeOffsetGEPMap.find(II);
2704if (it != LargeOffsetGEPMap.end()) {
2705// Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2706// Make sure not to have to deal with iterator invalidation
2707// after possibly adding ArgVal to LargeOffsetGEPMap.
2708auto GEPs = std::move(it->second);
2709 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2710 LargeOffsetGEPMap.erase(II);
2711 }
2712
2713replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2714II->eraseFromParent();
2715returntrue;
2716 }
2717case Intrinsic::cttz:
2718case Intrinsic::ctlz:
2719// If counting zeros is expensive, try to avoid it.
2720returndespeculateCountZeros(II, *LI, TLI,DL, ModifiedDT, FreshBBs,
2721 IsHugeFunc);
2722case Intrinsic::fshl:
2723case Intrinsic::fshr:
2724return optimizeFunnelShift(II);
2725case Intrinsic::dbg_assign:
2726case Intrinsic::dbg_value:
2727return fixupDbgValue(II);
2728case Intrinsic::masked_gather:
2729return optimizeGatherScatterInst(II,II->getArgOperand(0));
2730case Intrinsic::masked_scatter:
2731return optimizeGatherScatterInst(II,II->getArgOperand(1));
2732 }
2733
2734SmallVector<Value *, 2> PtrOps;
2735Type *AccessTy;
2736if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2737while (!PtrOps.empty()) {
2738Value *PtrVal = PtrOps.pop_back_val();
2739unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2740if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2741returntrue;
2742 }
2743 }
2744
2745// From here on out we're working with named functions.
2746auto *Callee = CI->getCalledFunction();
2747if (!Callee)
2748returnfalse;
2749
2750// Lower all default uses of _chk calls. This is very similar
2751// to what InstCombineCalls does, but here we are only lowering calls
2752// to fortified library functions (e.g. __memcpy_chk) that have the default
2753// "don't know" as the objectsize. Anything else should be left alone.
2754FortifiedLibCallSimplifier Simplifier(TLInfo,true);
2755IRBuilder<> Builder(CI);
2756if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2757replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2758 CI->eraseFromParent();
2759returntrue;
2760 }
2761
2762// SCCP may have propagated, among other things, C++ static variables across
2763// calls. If this happens to be the case, we may want to undo it in order to
2764// avoid redundant pointer computation of the constant, as the function method
2765// returning the constant needs to be executed anyways.
2766auto GetUniformReturnValue = [](constFunction *F) ->GlobalVariable * {
2767if (!F->getReturnType()->isPointerTy())
2768returnnullptr;
2769
2770GlobalVariable *UniformValue =nullptr;
2771for (auto &BB : *F) {
2772if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2773if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2774if (!UniformValue)
2775 UniformValue =V;
2776elseif (V != UniformValue)
2777returnnullptr;
2778 }else {
2779returnnullptr;
2780 }
2781 }
2782 }
2783
2784return UniformValue;
2785 };
2786
2787if (Callee->hasExactDefinition()) {
2788if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2789bool MadeChange =false;
2790for (Use &U :make_early_inc_range(RV->uses())) {
2791auto *I = dyn_cast<Instruction>(U.getUser());
2792if (!I ||I->getParent() != CI->getParent()) {
2793// Limit to the same basic block to avoid extending the call-site live
2794// range, which otherwise could increase register pressure.
2795continue;
2796 }
2797if (CI->comesBefore(I)) {
2798U.set(CI);
2799 MadeChange =true;
2800 }
2801 }
2802
2803return MadeChange;
2804 }
2805 }
2806
2807returnfalse;
2808}
2809
2810staticboolisIntrinsicOrLFToBeTailCalled(constTargetLibraryInfo *TLInfo,
2811constCallInst *CI) {
2812assert(CI && CI->use_empty());
2813
2814if (constauto *II = dyn_cast<IntrinsicInst>(CI))
2815switch (II->getIntrinsicID()) {
2816case Intrinsic::memset:
2817case Intrinsic::memcpy:
2818case Intrinsic::memmove:
2819returntrue;
2820default:
2821returnfalse;
2822 }
2823
2824LibFunc LF;
2825Function *Callee = CI->getCalledFunction();
2826if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2827switch (LF) {
2828case LibFunc_strcpy:
2829case LibFunc_strncpy:
2830case LibFunc_strcat:
2831case LibFunc_strncat:
2832returntrue;
2833default:
2834returnfalse;
2835 }
2836
2837returnfalse;
2838}
2839
2840/// Look for opportunities to duplicate return instructions to the predecessor
2841/// to enable tail call optimizations. The case it is currently looking for is
2842/// the following one. Known intrinsics or library function that may be tail
2843/// called are taken into account as well.
2844/// @code
2845/// bb0:
2846/// %tmp0 = tail call i32 @f0()
2847/// br label %return
2848/// bb1:
2849/// %tmp1 = tail call i32 @f1()
2850/// br label %return
2851/// bb2:
2852/// %tmp2 = tail call i32 @f2()
2853/// br label %return
2854/// return:
2855/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2856/// ret i32 %retval
2857/// @endcode
2858///
2859/// =>
2860///
2861/// @code
2862/// bb0:
2863/// %tmp0 = tail call i32 @f0()
2864/// ret i32 %tmp0
2865/// bb1:
2866/// %tmp1 = tail call i32 @f1()
2867/// ret i32 %tmp1
2868/// bb2:
2869/// %tmp2 = tail call i32 @f2()
2870/// ret i32 %tmp2
2871/// @endcode
2872bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2873 ModifyDT &ModifiedDT) {
2874if (!BB->getTerminator())
2875returnfalse;
2876
2877ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2878if (!RetI)
2879returnfalse;
2880
2881assert(LI->getLoopFor(BB) ==nullptr &&"A return block cannot be in a loop");
2882
2883PHINode *PN =nullptr;
2884ExtractValueInst *EVI =nullptr;
2885BitCastInst *BCI =nullptr;
2886Value *V = RetI->getReturnValue();
2887if (V) {
2888 BCI = dyn_cast<BitCastInst>(V);
2889if (BCI)
2890V = BCI->getOperand(0);
2891
2892 EVI = dyn_cast<ExtractValueInst>(V);
2893if (EVI) {
2894V = EVI->getOperand(0);
2895if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2896returnfalse;
2897 }
2898
2899 PN = dyn_cast<PHINode>(V);
2900 }
2901
2902if (PN && PN->getParent() != BB)
2903returnfalse;
2904
2905auto isLifetimeEndOrBitCastFor = [](constInstruction *Inst) {
2906constBitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2907if (BC && BC->hasOneUse())
2908 Inst = BC->user_back();
2909
2910if (constIntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2911returnII->getIntrinsicID() == Intrinsic::lifetime_end;
2912returnfalse;
2913 };
2914
2915SmallVector<const IntrinsicInst *, 4> FakeUses;
2916
2917auto isFakeUse = [&FakeUses](constInstruction *Inst) {
2918if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2919II &&II->getIntrinsicID() == Intrinsic::fake_use) {
2920// Record the instruction so it can be preserved when the exit block is
2921// removed. Do not preserve the fake use that uses the result of the
2922// PHI instruction.
2923// Do not copy fake uses that use the result of a PHI node.
2924// FIXME: If we do want to copy the fake use into the return blocks, we
2925// have to figure out which of the PHI node operands to use for each
2926// copy.
2927if (!isa<PHINode>(II->getOperand(0))) {
2928 FakeUses.push_back(II);
2929 }
2930returntrue;
2931 }
2932
2933returnfalse;
2934 };
2935
2936// Make sure there are no instructions between the first instruction
2937// and return.
2938BasicBlock::const_iterator BI = BB->getFirstNonPHIIt();
2939// Skip over debug and the bitcast.
2940while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||
2941 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(&*BI) ||
2942 isFakeUse(&*BI))
2943 BI = std::next(BI);
2944if (&*BI != RetI)
2945returnfalse;
2946
2947 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2948 /// call.
2949constFunction *F = BB->getParent();
2950SmallVector<BasicBlock *, 4> TailCallBBs;
2951// Record the call instructions so we can insert any fake uses
2952// that need to be preserved before them.
2953SmallVector<CallInst *, 4> CallInsts;
2954if (PN) {
2955for (unsignedI = 0, E = PN->getNumIncomingValues();I != E; ++I) {
2956// Look through bitcasts.
2957Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2958CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2959BasicBlock *PredBB = PN->getIncomingBlock(I);
2960// Make sure the phi value is indeed produced by the tail call.
2961if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2962 TLI->mayBeEmittedAsTailCall(CI) &&
2963attributesPermitTailCall(F, CI, RetI, *TLI)) {
2964 TailCallBBs.push_back(PredBB);
2965 CallInsts.push_back(CI);
2966 }else {
2967// Consider the cases in which the phi value is indirectly produced by
2968// the tail call, for example when encountering memset(), memmove(),
2969// strcpy(), whose return value may have been optimized out. In such
2970// cases, the value needs to be the first function argument.
2971//
2972// bb0:
2973// tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2974// br label %return
2975// return:
2976// %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2977if (PredBB && PredBB->getSingleSuccessor() == BB)
2978 CI = dyn_cast_or_null<CallInst>(
2979 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2980
2981if (CI && CI->use_empty() &&
2982isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2983 IncomingVal == CI->getArgOperand(0) &&
2984 TLI->mayBeEmittedAsTailCall(CI) &&
2985attributesPermitTailCall(F, CI, RetI, *TLI)) {
2986 TailCallBBs.push_back(PredBB);
2987 CallInsts.push_back(CI);
2988 }
2989 }
2990 }
2991 }else {
2992SmallPtrSet<BasicBlock *, 4> VisitedBBs;
2993for (BasicBlock *Pred :predecessors(BB)) {
2994if (!VisitedBBs.insert(Pred).second)
2995continue;
2996if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2997CallInst *CI = dyn_cast<CallInst>(I);
2998if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2999attributesPermitTailCall(F, CI, RetI, *TLI)) {
3000// Either we return void or the return value must be the first
3001// argument of a known intrinsic or library function.
3002if (!V || isa<UndefValue>(V) ||
3003 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3004 V == CI->getArgOperand(0))) {
3005 TailCallBBs.push_back(Pred);
3006 CallInsts.push_back(CI);
3007 }
3008 }
3009 }
3010 }
3011 }
3012
3013bool Changed =false;
3014for (autoconst &TailCallBB : TailCallBBs) {
3015// Make sure the call instruction is followed by an unconditional branch to
3016// the return block.
3017BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3018if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3019continue;
3020
3021// Duplicate the return into TailCallBB.
3022 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3023assert(!VerifyBFIUpdates ||
3024BFI->getBlockFreq(BB) >=BFI->getBlockFreq(TailCallBB));
3025BFI->setBlockFreq(BB,
3026 (BFI->getBlockFreq(BB) -BFI->getBlockFreq(TailCallBB)));
3027 ModifiedDT = ModifyDT::ModifyBBDT;
3028 Changed =true;
3029 ++NumRetsDup;
3030 }
3031
3032// If we eliminated all predecessors of the block, delete the block now.
3033if (Changed && !BB->hasAddressTaken() &&pred_empty(BB)) {
3034// Copy the fake uses found in the original return block to all blocks
3035// that contain tail calls.
3036for (auto *CI : CallInsts) {
3037for (autoconst *FakeUse : FakeUses) {
3038auto *ClonedInst = FakeUse->clone();
3039 ClonedInst->insertBefore(CI->getIterator());
3040 }
3041 }
3042 BB->eraseFromParent();
3043 }
3044
3045return Changed;
3046}
3047
3048//===----------------------------------------------------------------------===//
3049// Memory Optimization
3050//===----------------------------------------------------------------------===//
3051
3052namespace{
3053
3054/// This is an extended version of TargetLowering::AddrMode
3055/// which holds actual Value*'s for register values.
3056structExtAddrMode :publicTargetLowering::AddrMode {
3057Value *BaseReg =nullptr;
3058Value *ScaledReg =nullptr;
3059Value *OriginalValue =nullptr;
3060bool InBounds =true;
3061
3062enum FieldName {
3063 NoField = 0x00,
3064 BaseRegField = 0x01,
3065 BaseGVField = 0x02,
3066 BaseOffsField = 0x04,
3067 ScaledRegField = 0x08,
3068 ScaleField = 0x10,
3069 MultipleFields = 0xff
3070 };
3071
3072ExtAddrMode() =default;
3073
3074voidprint(raw_ostream &OS)const;
3075voiddump()const;
3076
3077 FieldNamecompare(constExtAddrMode &other) {
3078// First check that the types are the same on each field, as differing types
3079// is something we can't cope with later on.
3080if (BaseReg && other.BaseReg &&
3081BaseReg->getType() != other.BaseReg->getType())
3082return MultipleFields;
3083if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3084return MultipleFields;
3085if (ScaledReg && other.ScaledReg &&
3086ScaledReg->getType() != other.ScaledReg->getType())
3087return MultipleFields;
3088
3089// Conservatively reject 'inbounds' mismatches.
3090if (InBounds != other.InBounds)
3091return MultipleFields;
3092
3093// Check each field to see if it differs.
3094unsignedResult = NoField;
3095if (BaseReg != other.BaseReg)
3096Result |= BaseRegField;
3097if (BaseGV != other.BaseGV)
3098Result |= BaseGVField;
3099if (BaseOffs != other.BaseOffs)
3100Result |= BaseOffsField;
3101if (ScaledReg != other.ScaledReg)
3102Result |= ScaledRegField;
3103// Don't count 0 as being a different scale, because that actually means
3104// unscaled (which will already be counted by having no ScaledReg).
3105if (Scale && other.Scale &&Scale != other.Scale)
3106Result |= ScaleField;
3107
3108if (llvm::popcount(Result) > 1)
3109return MultipleFields;
3110else
3111returnstatic_cast<FieldName>(Result);
3112 }
3113
3114// An AddrMode is trivial if it involves no calculation i.e. it is just a base
3115// with no offset.
3116bool isTrivial() {
3117// An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3118// trivial if at most one of these terms is nonzero, except that BaseGV and
3119// BaseReg both being zero actually means a null pointer value, which we
3120// consider to be 'non-zero' here.
3121return !BaseOffs && !Scale && !(BaseGV &&BaseReg);
3122 }
3123
3124Value *GetFieldAsValue(FieldNameField,Type *IntPtrTy) {
3125switch (Field) {
3126default:
3127returnnullptr;
3128case BaseRegField:
3129returnBaseReg;
3130case BaseGVField:
3131return BaseGV;
3132case ScaledRegField:
3133returnScaledReg;
3134case BaseOffsField:
3135return ConstantInt::get(IntPtrTy, BaseOffs);
3136 }
3137 }
3138
3139void SetCombinedField(FieldNameField,Value *V,
3140constSmallVectorImpl<ExtAddrMode> &AddrModes) {
3141switch (Field) {
3142default:
3143llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3144break;
3145case ExtAddrMode::BaseRegField:
3146BaseReg =V;
3147break;
3148case ExtAddrMode::BaseGVField:
3149// A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3150// in the BaseReg field.
3151assert(BaseReg ==nullptr);
3152BaseReg =V;
3153 BaseGV =nullptr;
3154break;
3155case ExtAddrMode::ScaledRegField:
3156ScaledReg =V;
3157// If we have a mix of scaled and unscaled addrmodes then we want scale
3158// to be the scale and not zero.
3159if (!Scale)
3160for (constExtAddrMode &AM : AddrModes)
3161if (AM.Scale) {
3162Scale = AM.Scale;
3163break;
3164 }
3165break;
3166case ExtAddrMode::BaseOffsField:
3167// The offset is no longer a constant, so it goes in ScaledReg with a
3168// scale of 1.
3169assert(ScaledReg ==nullptr);
3170ScaledReg =V;
3171Scale = 1;
3172 BaseOffs = 0;
3173break;
3174 }
3175 }
3176};
3177
3178#ifndef NDEBUG
3179staticinlineraw_ostream &operator<<(raw_ostream &OS,constExtAddrMode &AM) {
3180 AM.print(OS);
3181returnOS;
3182}
3183#endif
3184
3185#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3186void ExtAddrMode::print(raw_ostream &OS) const{
3187bool NeedPlus =false;
3188OS <<"[";
3189if (InBounds)
3190OS <<"inbounds ";
3191if (BaseGV) {
3192OS <<"GV:";
3193 BaseGV->printAsOperand(OS,/*PrintType=*/false);
3194 NeedPlus =true;
3195 }
3196
3197if (BaseOffs) {
3198OS << (NeedPlus ?" + " :"") << BaseOffs;
3199 NeedPlus =true;
3200 }
3201
3202if (BaseReg) {
3203OS << (NeedPlus ?" + " :"") <<"Base:";
3204BaseReg->printAsOperand(OS,/*PrintType=*/false);
3205 NeedPlus =true;
3206 }
3207if (Scale) {
3208OS << (NeedPlus ?" + " :"") <<Scale <<"*";
3209ScaledReg->printAsOperand(OS,/*PrintType=*/false);
3210 }
3211
3212OS <<']';
3213}
3214
3215LLVM_DUMP_METHODvoid ExtAddrMode::dump() const{
3216print(dbgs());
3217dbgs() <<'\n';
3218}
3219#endif
3220
3221}// end anonymous namespace
3222
3223namespace{
3224
3225/// This class provides transaction based operation on the IR.
3226/// Every change made through this class is recorded in the internal state and
3227/// can be undone (rollback) until commit is called.
3228/// CGP does not check if instructions could be speculatively executed when
3229/// moved. Preserving the original location would pessimize the debugging
3230/// experience, as well as negatively impact the quality of sample PGO.
3231classTypePromotionTransaction {
3232 /// This represents the common interface of the individual transaction.
3233 /// Each class implements the logic for doing one specific modification on
3234 /// the IR via the TypePromotionTransaction.
3235classTypePromotionAction {
3236protected:
3237 /// The Instruction modified.
3238Instruction *Inst;
3239
3240public:
3241 /// Constructor of the action.
3242 /// The constructor performs the related action on the IR.
3243 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3244
3245virtual ~TypePromotionAction() =default;
3246
3247 /// Undo the modification done by this action.
3248 /// When this method is called, the IR must be in the same state as it was
3249 /// before this action was applied.
3250 /// \pre Undoing the action works if and only if the IR is in the exact same
3251 /// state as it was directly after this action was applied.
3252virtualvoid undo() = 0;
3253
3254 /// Advocate every change made by this action.
3255 /// When the results on the IR of the action are to be kept, it is important
3256 /// to call this function, otherwise hidden information may be kept forever.
3257virtualvoid commit() {
3258// Nothing to be done, this action is not doing anything.
3259 }
3260 };
3261
3262 /// Utility to remember the position of an instruction.
3263classInsertionHandler {
3264 /// Position of an instruction.
3265 /// Either an instruction:
3266 /// - Is the first in a basic block: BB is used.
3267 /// - Has a previous instruction: PrevInst is used.
3268struct{
3269BasicBlock::iterator PrevInst;
3270BasicBlock *BB;
3271 } Point;
3272 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3273
3274 /// Remember whether or not the instruction had a previous instruction.
3275bool HasPrevInstruction;
3276
3277public:
3278 /// Record the position of \p Inst.
3279 InsertionHandler(Instruction *Inst) {
3280 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3281BasicBlock *BB = Inst->getParent();
3282
3283// Record where we would have to re-insert the instruction in the sequence
3284// of DbgRecords, if we ended up reinserting.
3285if (BB->IsNewDbgInfoFormat)
3286 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3287
3288if (HasPrevInstruction) {
3289 Point.PrevInst = std::prev(Inst->getIterator());
3290 }else {
3291 Point.BB = BB;
3292 }
3293 }
3294
3295 /// Insert \p Inst at the recorded position.
3296void insert(Instruction *Inst) {
3297if (HasPrevInstruction) {
3298if (Inst->getParent())
3299 Inst->removeFromParent();
3300 Inst->insertAfter(Point.PrevInst);
3301 }else {
3302BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3303if (Inst->getParent())
3304 Inst->moveBefore(*Point.BB, Position);
3305else
3306 Inst->insertBefore(*Point.BB, Position);
3307 }
3308
3309 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3310 }
3311 };
3312
3313 /// Move an instruction before another.
3314classInstructionMoveBefore :public TypePromotionAction {
3315 /// Original position of the instruction.
3316 InsertionHandler Position;
3317
3318public:
3319 /// Move \p Inst before \p Before.
3320 InstructionMoveBefore(Instruction *Inst,BasicBlock::iteratorBefore)
3321 : TypePromotionAction(Inst), Position(Inst) {
3322LLVM_DEBUG(dbgs() <<"Do: move: " << *Inst <<"\nbefore: " << *Before
3323 <<"\n");
3324 Inst->moveBefore(Before);
3325 }
3326
3327 /// Move the instruction back to its original position.
3328void undo() override{
3329LLVM_DEBUG(dbgs() <<"Undo: moveBefore: " << *Inst <<"\n");
3330 Position.insert(Inst);
3331 }
3332 };
3333
3334 /// Set the operand of an instruction with a new value.
3335classOperandSetter :public TypePromotionAction {
3336 /// Original operand of the instruction.
3337Value *Origin;
3338
3339 /// Index of the modified instruction.
3340unsignedIdx;
3341
3342public:
3343 /// Set \p Idx operand of \p Inst with \p NewVal.
3344 OperandSetter(Instruction *Inst,unsignedIdx,Value *NewVal)
3345 : TypePromotionAction(Inst),Idx(Idx) {
3346LLVM_DEBUG(dbgs() <<"Do: setOperand: " <<Idx <<"\n"
3347 <<"for:" << *Inst <<"\n"
3348 <<"with:" << *NewVal <<"\n");
3349 Origin = Inst->getOperand(Idx);
3350 Inst->setOperand(Idx, NewVal);
3351 }
3352
3353 /// Restore the original value of the instruction.
3354void undo() override{
3355LLVM_DEBUG(dbgs() <<"Undo: setOperand:" <<Idx <<"\n"
3356 <<"for: " << *Inst <<"\n"
3357 <<"with: " << *Origin <<"\n");
3358 Inst->setOperand(Idx, Origin);
3359 }
3360 };
3361
3362 /// Hide the operands of an instruction.
3363 /// Do as if this instruction was not using any of its operands.
3364classOperandsHider :public TypePromotionAction {
3365 /// The list of original operands.
3366SmallVector<Value *, 4> OriginalValues;
3367
3368public:
3369 /// Remove \p Inst from the uses of the operands of \p Inst.
3370 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3371LLVM_DEBUG(dbgs() <<"Do: OperandsHider: " << *Inst <<"\n");
3372unsigned NumOpnds = Inst->getNumOperands();
3373 OriginalValues.reserve(NumOpnds);
3374for (unsigned It = 0; It < NumOpnds; ++It) {
3375// Save the current operand.
3376Value *Val = Inst->getOperand(It);
3377 OriginalValues.push_back(Val);
3378// Set a dummy one.
3379// We could use OperandSetter here, but that would imply an overhead
3380// that we are not willing to pay.
3381 Inst->setOperand(It,PoisonValue::get(Val->getType()));
3382 }
3383 }
3384
3385 /// Restore the original list of uses.
3386void undo() override{
3387LLVM_DEBUG(dbgs() <<"Undo: OperandsHider: " << *Inst <<"\n");
3388for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3389 Inst->setOperand(It, OriginalValues[It]);
3390 }
3391 };
3392
3393 /// Build a truncate instruction.
3394classTruncBuilder :public TypePromotionAction {
3395Value *Val;
3396
3397public:
3398 /// Build a truncate instruction of \p Opnd producing a \p Ty
3399 /// result.
3400 /// trunc Opnd to Ty.
3401 TruncBuilder(Instruction *Opnd,Type *Ty) : TypePromotionAction(Opnd) {
3402IRBuilder<> Builder(Opnd);
3403 Builder.SetCurrentDebugLocation(DebugLoc());
3404 Val = Builder.CreateTrunc(Opnd, Ty,"promoted");
3405LLVM_DEBUG(dbgs() <<"Do: TruncBuilder: " << *Val <<"\n");
3406 }
3407
3408 /// Get the built value.
3409Value *getBuiltValue() {return Val; }
3410
3411 /// Remove the built instruction.
3412void undo() override{
3413LLVM_DEBUG(dbgs() <<"Undo: TruncBuilder: " << *Val <<"\n");
3414if (Instruction *IVal = dyn_cast<Instruction>(Val))
3415 IVal->eraseFromParent();
3416 }
3417 };
3418
3419 /// Build a sign extension instruction.
3420classSExtBuilder :public TypePromotionAction {
3421Value *Val;
3422
3423public:
3424 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3425 /// result.
3426 /// sext Opnd to Ty.
3427 SExtBuilder(Instruction *InsertPt,Value *Opnd,Type *Ty)
3428 : TypePromotionAction(InsertPt) {
3429IRBuilder<> Builder(InsertPt);
3430 Val = Builder.CreateSExt(Opnd, Ty,"promoted");
3431LLVM_DEBUG(dbgs() <<"Do: SExtBuilder: " << *Val <<"\n");
3432 }
3433
3434 /// Get the built value.
3435Value *getBuiltValue() {return Val; }
3436
3437 /// Remove the built instruction.
3438void undo() override{
3439LLVM_DEBUG(dbgs() <<"Undo: SExtBuilder: " << *Val <<"\n");
3440if (Instruction *IVal = dyn_cast<Instruction>(Val))
3441 IVal->eraseFromParent();
3442 }
3443 };
3444
3445 /// Build a zero extension instruction.
3446classZExtBuilder :public TypePromotionAction {
3447Value *Val;
3448
3449public:
3450 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3451 /// result.
3452 /// zext Opnd to Ty.
3453 ZExtBuilder(Instruction *InsertPt,Value *Opnd,Type *Ty)
3454 : TypePromotionAction(InsertPt) {
3455IRBuilder<> Builder(InsertPt);
3456 Builder.SetCurrentDebugLocation(DebugLoc());
3457 Val = Builder.CreateZExt(Opnd, Ty,"promoted");
3458LLVM_DEBUG(dbgs() <<"Do: ZExtBuilder: " << *Val <<"\n");
3459 }
3460
3461 /// Get the built value.
3462Value *getBuiltValue() {return Val; }
3463
3464 /// Remove the built instruction.
3465void undo() override{
3466LLVM_DEBUG(dbgs() <<"Undo: ZExtBuilder: " << *Val <<"\n");
3467if (Instruction *IVal = dyn_cast<Instruction>(Val))
3468 IVal->eraseFromParent();
3469 }
3470 };
3471
3472 /// Mutate an instruction to another type.
3473classTypeMutator :public TypePromotionAction {
3474 /// Record the original type.
3475Type *OrigTy;
3476
3477public:
3478 /// Mutate the type of \p Inst into \p NewTy.
3479 TypeMutator(Instruction *Inst,Type *NewTy)
3480 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3481LLVM_DEBUG(dbgs() <<"Do: MutateType: " << *Inst <<" with " << *NewTy
3482 <<"\n");
3483 Inst->mutateType(NewTy);
3484 }
3485
3486 /// Mutate the instruction back to its original type.
3487void undo() override{
3488LLVM_DEBUG(dbgs() <<"Undo: MutateType: " << *Inst <<" with " << *OrigTy
3489 <<"\n");
3490 Inst->mutateType(OrigTy);
3491 }
3492 };
3493
3494 /// Replace the uses of an instruction by another instruction.
3495classUsesReplacer :public TypePromotionAction {
3496 /// Helper structure to keep track of the replaced uses.
3497structInstructionAndIdx {
3498 /// The instruction using the instruction.
3499Instruction *Inst;
3500
3501 /// The index where this instruction is used for Inst.
3502unsignedIdx;
3503
3504 InstructionAndIdx(Instruction *Inst,unsignedIdx)
3505 : Inst(Inst),Idx(Idx) {}
3506 };
3507
3508 /// Keep track of the original uses (pair Instruction, Index).
3509SmallVector<InstructionAndIdx, 4> OriginalUses;
3510 /// Keep track of the debug users.
3511SmallVector<DbgValueInst *, 1> DbgValues;
3512 /// And non-instruction debug-users too.
3513SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3514
3515 /// Keep track of the new value so that we can undo it by replacing
3516 /// instances of the new value with the original value.
3517Value *New;
3518
3519usinguse_iterator =SmallVectorImpl<InstructionAndIdx>::iterator;
3520
3521public:
3522 /// Replace all the use of \p Inst by \p New.
3523 UsesReplacer(Instruction *Inst,Value *New)
3524 : TypePromotionAction(Inst),New(New) {
3525LLVM_DEBUG(dbgs() <<"Do: UsersReplacer: " << *Inst <<" with " << *New
3526 <<"\n");
3527// Record the original uses.
3528for (Use &U : Inst->uses()) {
3529Instruction *UserI = cast<Instruction>(U.getUser());
3530 OriginalUses.push_back(InstructionAndIdx(UserI,U.getOperandNo()));
3531 }
3532// Record the debug uses separately. They are not in the instruction's
3533// use list, but they are replaced by RAUW.
3534findDbgValues(DbgValues, Inst, &DbgVariableRecords);
3535
3536// Now, we can replace the uses.
3537 Inst->replaceAllUsesWith(New);
3538 }
3539
3540 /// Reassign the original uses of Inst to Inst.
3541void undo() override{
3542LLVM_DEBUG(dbgs() <<"Undo: UsersReplacer: " << *Inst <<"\n");
3543for (InstructionAndIdx &Use : OriginalUses)
3544Use.Inst->setOperand(Use.Idx, Inst);
3545// RAUW has replaced all original uses with references to the new value,
3546// including the debug uses. Since we are undoing the replacements,
3547// the original debug uses must also be reinstated to maintain the
3548// correctness and utility of debug value instructions.
3549for (auto *DVI : DbgValues)
3550 DVI->replaceVariableLocationOp(New, Inst);
3551// Similar story with DbgVariableRecords, the non-instruction
3552// representation of dbg.values.
3553for (DbgVariableRecord *DVR : DbgVariableRecords)
3554 DVR->replaceVariableLocationOp(New, Inst);
3555 }
3556 };
3557
3558 /// Remove an instruction from the IR.
3559classInstructionRemover :public TypePromotionAction {
3560 /// Original position of the instruction.
3561 InsertionHandler Inserter;
3562
3563 /// Helper structure to hide all the link to the instruction. In other
3564 /// words, this helps to do as if the instruction was removed.
3565 OperandsHider Hider;
3566
3567 /// Keep track of the uses replaced, if any.
3568 UsesReplacer *Replacer =nullptr;
3569
3570 /// Keep track of instructions removed.
3571 SetOfInstrs &RemovedInsts;
3572
3573public:
3574 /// Remove all reference of \p Inst and optionally replace all its
3575 /// uses with New.
3576 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3577 /// \pre If !Inst->use_empty(), then New != nullptr
3578 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3579Value *New =nullptr)
3580 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3581 RemovedInsts(RemovedInsts) {
3582if (New)
3583 Replacer =new UsesReplacer(Inst, New);
3584LLVM_DEBUG(dbgs() <<"Do: InstructionRemover: " << *Inst <<"\n");
3585 RemovedInsts.insert(Inst);
3586 /// The instructions removed here will be freed after completing
3587 /// optimizeBlock() for all blocks as we need to keep track of the
3588 /// removed instructions during promotion.
3589 Inst->removeFromParent();
3590 }
3591
3592 ~InstructionRemover() override{delete Replacer; }
3593
3594 InstructionRemover &operator=(const InstructionRemover &other) =delete;
3595 InstructionRemover(const InstructionRemover &other) =delete;
3596
3597 /// Resurrect the instruction and reassign it to the proper uses if
3598 /// new value was provided when build this action.
3599void undo() override{
3600LLVM_DEBUG(dbgs() <<"Undo: InstructionRemover: " << *Inst <<"\n");
3601 Inserter.insert(Inst);
3602if (Replacer)
3603 Replacer->undo();
3604 Hider.undo();
3605 RemovedInsts.erase(Inst);
3606 }
3607 };
3608
3609public:
3610 /// Restoration point.
3611 /// The restoration point is a pointer to an action instead of an iterator
3612 /// because the iterator may be invalidated but not the pointer.
3613usingConstRestorationPt =const TypePromotionAction *;
3614
3615 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3616 : RemovedInsts(RemovedInsts) {}
3617
3618 /// Advocate every changes made in that transaction. Return true if any change
3619 /// happen.
3620bool commit();
3621
3622 /// Undo all the changes made after the given point.
3623void rollback(ConstRestorationPt Point);
3624
3625 /// Get the current restoration point.
3626 ConstRestorationPt getRestorationPoint()const;
3627
3628 /// \name API for IR modification with state keeping to support rollback.
3629 /// @{
3630 /// Same as Instruction::setOperand.
3631void setOperand(Instruction *Inst,unsignedIdx,Value *NewVal);
3632
3633 /// Same as Instruction::eraseFromParent.
3634voideraseInstruction(Instruction *Inst,Value *NewVal =nullptr);
3635
3636 /// Same as Value::replaceAllUsesWith.
3637voidreplaceAllUsesWith(Instruction *Inst,Value *New);
3638
3639 /// Same as Value::mutateType.
3640void mutateType(Instruction *Inst,Type *NewTy);
3641
3642 /// Same as IRBuilder::createTrunc.
3643Value *createTrunc(Instruction *Opnd,Type *Ty);
3644
3645 /// Same as IRBuilder::createSExt.
3646Value *createSExt(Instruction *Inst,Value *Opnd,Type *Ty);
3647
3648 /// Same as IRBuilder::createZExt.
3649Value *createZExt(Instruction *Inst,Value *Opnd,Type *Ty);
3650
3651private:
3652 /// The ordered list of actions made so far.
3653SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
3654
3655usingCommitPt =
3656SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3657
3658 SetOfInstrs &RemovedInsts;
3659};
3660
3661}// end anonymous namespace
3662
3663void TypePromotionTransaction::setOperand(Instruction *Inst,unsignedIdx,
3664Value *NewVal) {
3665 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3666 Inst,Idx, NewVal));
3667}
3668
3669void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3670Value *NewVal) {
3671 Actions.push_back(
3672 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3673 Inst, RemovedInsts, NewVal));
3674}
3675
3676void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3677Value *New) {
3678 Actions.push_back(
3679 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3680}
3681
3682void TypePromotionTransaction::mutateType(Instruction *Inst,Type *NewTy) {
3683 Actions.push_back(
3684 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3685}
3686
3687Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,Type *Ty) {
3688 std::unique_ptr<TruncBuilder>Ptr(new TruncBuilder(Opnd, Ty));
3689Value *Val =Ptr->getBuiltValue();
3690 Actions.push_back(std::move(Ptr));
3691return Val;
3692}
3693
3694Value *TypePromotionTransaction::createSExt(Instruction *Inst,Value *Opnd,
3695Type *Ty) {
3696 std::unique_ptr<SExtBuilder>Ptr(new SExtBuilder(Inst, Opnd, Ty));
3697Value *Val =Ptr->getBuiltValue();
3698 Actions.push_back(std::move(Ptr));
3699return Val;
3700}
3701
3702Value *TypePromotionTransaction::createZExt(Instruction *Inst,Value *Opnd,
3703Type *Ty) {
3704 std::unique_ptr<ZExtBuilder>Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3705Value *Val =Ptr->getBuiltValue();
3706 Actions.push_back(std::move(Ptr));
3707return Val;
3708}
3709
3710TypePromotionTransaction::ConstRestorationPt
3711TypePromotionTransaction::getRestorationPoint() const{
3712return !Actions.empty() ? Actions.back().get() :nullptr;
3713}
3714
3715bool TypePromotionTransaction::commit() {
3716for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3717 Action->commit();
3718boolModified = !Actions.empty();
3719 Actions.clear();
3720returnModified;
3721}
3722
3723void TypePromotionTransaction::rollback(
3724 TypePromotionTransaction::ConstRestorationPt Point) {
3725while (!Actions.empty() && Point != Actions.back().get()) {
3726 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3727 Curr->undo();
3728 }
3729}
3730
3731namespace{
3732
3733/// A helper class for matching addressing modes.
3734///
3735/// This encapsulates the logic for matching the target-legal addressing modes.
3736classAddressingModeMatcher {
3737SmallVectorImpl<Instruction *> &AddrModeInsts;
3738constTargetLowering &TLI;
3739constTargetRegisterInfo &TRI;
3740constDataLayout &DL;
3741constLoopInfo &LI;
3742const std::function<constDominatorTree &()> getDTFn;
3743
3744 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3745 /// the memory instruction that we're computing this address for.
3746Type *AccessTy;
3747unsigned AddrSpace;
3748Instruction *MemoryInst;
3749
3750 /// This is the addressing mode that we're building up. This is
3751 /// part of the return value of this addressing mode matching stuff.
3752ExtAddrMode &AddrMode;
3753
3754 /// The instructions inserted by other CodeGenPrepare optimizations.
3755const SetOfInstrs &InsertedInsts;
3756
3757 /// A map from the instructions to their type before promotion.
3758 InstrToOrigTy &PromotedInsts;
3759
3760 /// The ongoing transaction where every action should be registered.
3761 TypePromotionTransaction &TPT;
3762
3763// A GEP which has too large offset to be folded into the addressing mode.
3764 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3765
3766 /// This is set to true when we should not do profitability checks.
3767 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3768bool IgnoreProfitability;
3769
3770 /// True if we are optimizing for size.
3771bool OptSize =false;
3772
3773ProfileSummaryInfo *PSI;
3774BlockFrequencyInfo *BFI;
3775
3776 AddressingModeMatcher(
3777SmallVectorImpl<Instruction *> &AMI,constTargetLowering &TLI,
3778constTargetRegisterInfo &TRI,constLoopInfo &LI,
3779const std::function<constDominatorTree &()> getDTFn,Type *AT,
3780unsigned AS,Instruction *MI,ExtAddrMode &AM,
3781const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3782 TypePromotionTransaction &TPT,
3783 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3784bool OptSize,ProfileSummaryInfo *PSI,BlockFrequencyInfo *BFI)
3785 : AddrModeInsts(AMI), TLI(TLI),TRI(TRI),
3786DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3787 AccessTy(AT), AddrSpace(AS), MemoryInst(MI),AddrMode(AM),
3788 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3789 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI),BFI(BFI) {
3790 IgnoreProfitability =false;
3791 }
3792
3793public:
3794 /// Find the maximal addressing mode that a load/store of V can fold,
3795 /// give an access type of AccessTy. This returns a list of involved
3796 /// instructions in AddrModeInsts.
3797 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3798 /// optimizations.
3799 /// \p PromotedInsts maps the instructions to their type before promotion.
3800 /// \p The ongoing transaction where every action should be registered.
3801staticExtAddrMode
3802Match(Value *V,Type *AccessTy,unsigned AS,Instruction *MemoryInst,
3803SmallVectorImpl<Instruction *> &AddrModeInsts,
3804constTargetLowering &TLI,constLoopInfo &LI,
3805const std::function<constDominatorTree &()> getDTFn,
3806constTargetRegisterInfo &TRI,const SetOfInstrs &InsertedInsts,
3807 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3808 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3809bool OptSize,ProfileSummaryInfo *PSI,BlockFrequencyInfo *BFI) {
3810ExtAddrModeResult;
3811
3812boolSuccess = AddressingModeMatcher(AddrModeInsts, TLI,TRI, LI, getDTFn,
3813 AccessTy, AS, MemoryInst, Result,
3814 InsertedInsts, PromotedInsts, TPT,
3815 LargeOffsetGEP, OptSize, PSI, BFI)
3816 .matchAddr(V, 0);
3817 (void)Success;
3818assert(Success &&"Couldn't select *anything*?");
3819returnResult;
3820 }
3821
3822private:
3823bool matchScaledValue(Value *ScaleReg, int64_t Scale,unsignedDepth);
3824bool matchAddr(Value *Addr,unsignedDepth);
3825bool matchOperationAddr(User *AddrInst,unsigned Opcode,unsignedDepth,
3826bool *MovedAway =nullptr);
3827bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3828ExtAddrMode &AMBefore,
3829ExtAddrMode &AMAfter);
3830bool valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,Value *KnownLive2);
3831bool isPromotionProfitable(unsigned NewCost,unsigned OldCost,
3832Value *PromotedOperand)const;
3833};
3834
3835classPhiNodeSet;
3836
3837/// An iterator for PhiNodeSet.
3838classPhiNodeSetIterator {
3839 PhiNodeSet *constSet;
3840size_t CurrentIndex = 0;
3841
3842public:
3843 /// The constructor. Start should point to either a valid element, or be equal
3844 /// to the size of the underlying SmallVector of the PhiNodeSet.
3845 PhiNodeSetIterator(PhiNodeSet *const Set,size_t Start);
3846PHINode *operator*()const;
3847 PhiNodeSetIterator &operator++();
3848booloperator==(const PhiNodeSetIterator &RHS)const;
3849booloperator!=(const PhiNodeSetIterator &RHS)const;
3850};
3851
3852/// Keeps a set of PHINodes.
3853///
3854/// This is a minimal set implementation for a specific use case:
3855/// It is very fast when there are very few elements, but also provides good
3856/// performance when there are many. It is similar to SmallPtrSet, but also
3857/// provides iteration by insertion order, which is deterministic and stable
3858/// across runs. It is also similar to SmallSetVector, but provides removing
3859/// elements in O(1) time. This is achieved by not actually removing the element
3860/// from the underlying vector, so comes at the cost of using more memory, but
3861/// that is fine, since PhiNodeSets are used as short lived objects.
3862classPhiNodeSet {
3863friendclassPhiNodeSetIterator;
3864
3865usingMapType =SmallDenseMap<PHINode *, size_t, 32>;
3866usingiterator = PhiNodeSetIterator;
3867
3868 /// Keeps the elements in the order of their insertion in the underlying
3869 /// vector. To achieve constant time removal, it never deletes any element.
3870SmallVector<PHINode *, 32>NodeList;
3871
3872 /// Keeps the elements in the underlying set implementation. This (and not the
3873 /// NodeList defined above) is the source of truth on whether an element
3874 /// is actually in the collection.
3875 MapType NodeMap;
3876
3877 /// Points to the first valid (not deleted) element when the set is not empty
3878 /// and the value is not zero. Equals to the size of the underlying vector
3879 /// when the set is empty. When the value is 0, as in the beginning, the
3880 /// first element may or may not be valid.
3881size_t FirstValidElement = 0;
3882
3883public:
3884 /// Inserts a new element to the collection.
3885 /// \returns true if the element is actually added, i.e. was not in the
3886 /// collection before the operation.
3887bool insert(PHINode *Ptr) {
3888if (NodeMap.insert(std::make_pair(Ptr,NodeList.size())).second) {
3889NodeList.push_back(Ptr);
3890returntrue;
3891 }
3892returnfalse;
3893 }
3894
3895 /// Removes the element from the collection.
3896 /// \returns whether the element is actually removed, i.e. was in the
3897 /// collection before the operation.
3898boolerase(PHINode *Ptr) {
3899if (NodeMap.erase(Ptr)) {
3900 SkipRemovedElements(FirstValidElement);
3901returntrue;
3902 }
3903returnfalse;
3904 }
3905
3906 /// Removes all elements and clears the collection.
3907void clear() {
3908 NodeMap.clear();
3909NodeList.clear();
3910 FirstValidElement = 0;
3911 }
3912
3913 /// \returns an iterator that will iterate the elements in the order of
3914 /// insertion.
3915 iteratorbegin() {
3916if (FirstValidElement == 0)
3917 SkipRemovedElements(FirstValidElement);
3918return PhiNodeSetIterator(this, FirstValidElement);
3919 }
3920
3921 /// \returns an iterator that points to the end of the collection.
3922 iteratorend() {return PhiNodeSetIterator(this,NodeList.size()); }
3923
3924 /// Returns the number of elements in the collection.
3925size_tsize() const{return NodeMap.size(); }
3926
3927 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3928size_tcount(PHINode *Ptr) const{return NodeMap.count(Ptr); }
3929
3930private:
3931 /// Updates the CurrentIndex so that it will point to a valid element.
3932 ///
3933 /// If the element of NodeList at CurrentIndex is valid, it does not
3934 /// change it. If there are no more valid elements, it updates CurrentIndex
3935 /// to point to the end of the NodeList.
3936void SkipRemovedElements(size_t &CurrentIndex) {
3937while (CurrentIndex <NodeList.size()) {
3938auto it = NodeMap.find(NodeList[CurrentIndex]);
3939// If the element has been deleted and added again later, NodeMap will
3940// point to a different index, so CurrentIndex will still be invalid.
3941if (it != NodeMap.end() && it->second == CurrentIndex)
3942break;
3943 ++CurrentIndex;
3944 }
3945 }
3946};
3947
3948PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set,size_t Start)
3949 :Set(Set), CurrentIndex(Start) {}
3950
3951PHINode *PhiNodeSetIterator::operator*() const{
3952assert(CurrentIndex < Set->NodeList.size() &&
3953"PhiNodeSet access out of range");
3954returnSet->NodeList[CurrentIndex];
3955}
3956
3957PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3958assert(CurrentIndex < Set->NodeList.size() &&
3959"PhiNodeSet access out of range");
3960 ++CurrentIndex;
3961Set->SkipRemovedElements(CurrentIndex);
3962return *this;
3963}
3964
3965bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const{
3966return CurrentIndex ==RHS.CurrentIndex;
3967}
3968
3969bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const{
3970return !((*this) ==RHS);
3971}
3972
3973/// Keep track of simplification of Phi nodes.
3974/// Accept the set of all phi nodes and erase phi node from this set
3975/// if it is simplified.
3976classSimplificationTracker {
3977DenseMap<Value *, Value *> Storage;
3978constSimplifyQuery &SQ;
3979// Tracks newly created Phi nodes. The elements are iterated by insertion
3980// order.
3981 PhiNodeSet AllPhiNodes;
3982// Tracks newly created Select nodes.
3983SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3984
3985public:
3986 SimplificationTracker(constSimplifyQuery &sq) : SQ(sq) {}
3987
3988Value *Get(Value *V) {
3989do {
3990auto SV = Storage.find(V);
3991if (SV == Storage.end())
3992returnV;
3993V = SV->second;
3994 }while (true);
3995 }
3996
3997Value *Simplify(Value *Val) {
3998SmallVector<Value *, 32> WorkList;
3999SmallPtrSet<Value *, 32> Visited;
4000 WorkList.push_back(Val);
4001while (!WorkList.empty()) {
4002auto *P = WorkList.pop_back_val();
4003if (!Visited.insert(P).second)
4004continue;
4005if (auto *PI = dyn_cast<Instruction>(P))
4006if (Value *V =simplifyInstruction(cast<Instruction>(PI), SQ)) {
4007for (auto *U : PI->users())
4008 WorkList.push_back(cast<Value>(U));
4009 Put(PI, V);
4010 PI->replaceAllUsesWith(V);
4011if (auto *PHI = dyn_cast<PHINode>(PI))
4012 AllPhiNodes.erase(PHI);
4013if (auto *Select = dyn_cast<SelectInst>(PI))
4014 AllSelectNodes.erase(Select);
4015 PI->eraseFromParent();
4016 }
4017 }
4018return Get(Val);
4019 }
4020
4021void Put(Value *From,Value *To) { Storage.insert({From, To}); }
4022
4023void ReplacePhi(PHINode *From,PHINode *To) {
4024Value *OldReplacement = Get(From);
4025while (OldReplacement !=From) {
4026From = To;
4027 To = dyn_cast<PHINode>(OldReplacement);
4028 OldReplacement = Get(From);
4029 }
4030assert(To && Get(To) == To &&"Replacement PHI node is already replaced.");
4031 Put(From, To);
4032From->replaceAllUsesWith(To);
4033 AllPhiNodes.erase(From);
4034From->eraseFromParent();
4035 }
4036
4037 PhiNodeSet &newPhiNodes() {return AllPhiNodes; }
4038
4039void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4040
4041void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4042
4043unsigned countNewPhiNodes() const{return AllPhiNodes.size(); }
4044
4045unsigned countNewSelectNodes() const{return AllSelectNodes.size(); }
4046
4047void destroyNewNodes(Type *CommonType) {
4048// For safe erasing, replace the uses with dummy value first.
4049auto *Dummy =PoisonValue::get(CommonType);
4050for (auto *I : AllPhiNodes) {
4051I->replaceAllUsesWith(Dummy);
4052I->eraseFromParent();
4053 }
4054 AllPhiNodes.clear();
4055for (auto *I : AllSelectNodes) {
4056I->replaceAllUsesWith(Dummy);
4057I->eraseFromParent();
4058 }
4059 AllSelectNodes.clear();
4060 }
4061};
4062
4063/// A helper class for combining addressing modes.
4064classAddressingModeCombiner {
4065typedefDenseMap<Value *, Value *> FoldAddrToValueMapping;
4066typedef std::pair<PHINode *, PHINode *> PHIPair;
4067
4068private:
4069 /// The addressing modes we've collected.
4070SmallVector<ExtAddrMode, 16> AddrModes;
4071
4072 /// The field in which the AddrModes differ, when we have more than one.
4073 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4074
4075 /// Are the AddrModes that we have all just equal to their original values?
4076bool AllAddrModesTrivial =true;
4077
4078 /// Common Type for all different fields in addressing modes.
4079Type *CommonType =nullptr;
4080
4081 /// SimplifyQuery for simplifyInstruction utility.
4082constSimplifyQuery &SQ;
4083
4084 /// Original Address.
4085Value *Original;
4086
4087 /// Common value among addresses
4088Value *CommonValue =nullptr;
4089
4090public:
4091 AddressingModeCombiner(constSimplifyQuery &_SQ,Value *OriginalValue)
4092 : SQ(_SQ), Original(OriginalValue) {}
4093
4094 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4095
4096 /// Get the combined AddrMode
4097constExtAddrMode &getAddrMode() const{return AddrModes[0]; }
4098
4099 /// Add a new AddrMode if it's compatible with the AddrModes we already
4100 /// have.
4101 /// \return True iff we succeeded in doing so.
4102bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4103// Take note of if we have any non-trivial AddrModes, as we need to detect
4104// when all AddrModes are trivial as then we would introduce a phi or select
4105// which just duplicates what's already there.
4106 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4107
4108// If this is the first addrmode then everything is fine.
4109if (AddrModes.empty()) {
4110 AddrModes.emplace_back(NewAddrMode);
4111returntrue;
4112 }
4113
4114// Figure out how different this is from the other address modes, which we
4115// can do just by comparing against the first one given that we only care
4116// about the cumulative difference.
4117 ExtAddrMode::FieldName ThisDifferentField =
4118 AddrModes[0].compare(NewAddrMode);
4119if (DifferentField == ExtAddrMode::NoField)
4120 DifferentField = ThisDifferentField;
4121elseif (DifferentField != ThisDifferentField)
4122 DifferentField = ExtAddrMode::MultipleFields;
4123
4124// If NewAddrMode differs in more than one dimension we cannot handle it.
4125bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4126
4127// If Scale Field is different then we reject.
4128 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4129
4130// We also must reject the case when base offset is different and
4131// scale reg is not null, we cannot handle this case due to merge of
4132// different offsets will be used as ScaleReg.
4133 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4134 !NewAddrMode.ScaledReg);
4135
4136// We also must reject the case when GV is different and BaseReg installed
4137// due to we want to use base reg as a merge of GV values.
4138 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4139 !NewAddrMode.HasBaseReg);
4140
4141// Even if NewAddMode is the same we still need to collect it due to
4142// original value is different. And later we will need all original values
4143// as anchors during finding the common Phi node.
4144if (CanHandle)
4145 AddrModes.emplace_back(NewAddrMode);
4146else
4147 AddrModes.clear();
4148
4149return CanHandle;
4150 }
4151
4152 /// Combine the addressing modes we've collected into a single
4153 /// addressing mode.
4154 /// \return True iff we successfully combined them or we only had one so
4155 /// didn't need to combine them anyway.
4156bool combineAddrModes() {
4157// If we have no AddrModes then they can't be combined.
4158if (AddrModes.size() == 0)
4159returnfalse;
4160
4161// A single AddrMode can trivially be combined.
4162if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4163returntrue;
4164
4165// If the AddrModes we collected are all just equal to the value they are
4166// derived from then combining them wouldn't do anything useful.
4167if (AllAddrModesTrivial)
4168returnfalse;
4169
4170if (!addrModeCombiningAllowed())
4171returnfalse;
4172
4173// Build a map between <original value, basic block where we saw it> to
4174// value of base register.
4175// Bail out if there is no common type.
4176 FoldAddrToValueMappingMap;
4177if (!initializeMap(Map))
4178returnfalse;
4179
4180 CommonValue = findCommon(Map);
4181if (CommonValue)
4182 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4183return CommonValue !=nullptr;
4184 }
4185
4186private:
4187 /// `CommonValue` may be a placeholder inserted by us.
4188 /// If the placeholder is not used, we should remove this dead instruction.
4189void eraseCommonValueIfDead() {
4190if (CommonValue && CommonValue->getNumUses() == 0)
4191if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4192 CommonInst->eraseFromParent();
4193 }
4194
4195 /// Initialize Map with anchor values. For address seen
4196 /// we set the value of different field saw in this address.
4197 /// At the same time we find a common type for different field we will
4198 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4199 /// Return false if there is no common type found.
4200bool initializeMap(FoldAddrToValueMapping &Map) {
4201// Keep track of keys where the value is null. We will need to replace it
4202// with constant null when we know the common type.
4203SmallVector<Value *, 2> NullValue;
4204Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4205for (auto &AM : AddrModes) {
4206Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4207if (DV) {
4208auto *Type = DV->getType();
4209if (CommonType && CommonType !=Type)
4210returnfalse;
4211 CommonType =Type;
4212Map[AM.OriginalValue] = DV;
4213 }else {
4214 NullValue.push_back(AM.OriginalValue);
4215 }
4216 }
4217assert(CommonType &&"At least one non-null value must be!");
4218for (auto *V : NullValue)
4219Map[V] =Constant::getNullValue(CommonType);
4220returntrue;
4221 }
4222
4223 /// We have mapping between value A and other value B where B was a field in
4224 /// addressing mode represented by A. Also we have an original value C
4225 /// representing an address we start with. Traversing from C through phi and
4226 /// selects we ended up with A's in a map. This utility function tries to find
4227 /// a value V which is a field in addressing mode C and traversing through phi
4228 /// nodes and selects we will end up in corresponded values B in a map.
4229 /// The utility will create a new Phi/Selects if needed.
4230// The simple example looks as follows:
4231// BB1:
4232// p1 = b1 + 40
4233// br cond BB2, BB3
4234// BB2:
4235// p2 = b2 + 40
4236// br BB3
4237// BB3:
4238// p = phi [p1, BB1], [p2, BB2]
4239// v = load p
4240// Map is
4241// p1 -> b1
4242// p2 -> b2
4243// Request is
4244// p -> ?
4245// The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4246Value *findCommon(FoldAddrToValueMapping &Map) {
4247// Tracks the simplification of newly created phi nodes. The reason we use
4248// this mapping is because we will add new created Phi nodes in AddrToBase.
4249// Simplification of Phi nodes is recursive, so some Phi node may
4250// be simplified after we added it to AddrToBase. In reality this
4251// simplification is possible only if original phi/selects were not
4252// simplified yet.
4253// Using this mapping we can find the current value in AddrToBase.
4254 SimplificationTrackerST(SQ);
4255
4256// First step, DFS to create PHI nodes for all intermediate blocks.
4257// Also fill traverse order for the second step.
4258SmallVector<Value *, 32> TraverseOrder;
4259 InsertPlaceholders(Map, TraverseOrder, ST);
4260
4261// Second Step, fill new nodes by merged values and simplify if possible.
4262 FillPlaceholders(Map, TraverseOrder, ST);
4263
4264if (!AddrSinkNewSelects &&ST.countNewSelectNodes() > 0) {
4265ST.destroyNewNodes(CommonType);
4266returnnullptr;
4267 }
4268
4269// Now we'd like to match New Phi nodes to existed ones.
4270unsigned PhiNotMatchedCount = 0;
4271if (!MatchPhiSet(ST,AddrSinkNewPhis, PhiNotMatchedCount)) {
4272ST.destroyNewNodes(CommonType);
4273returnnullptr;
4274 }
4275
4276auto *Result =ST.Get(Map.find(Original)->second);
4277if (Result) {
4278 NumMemoryInstsPhiCreated +=ST.countNewPhiNodes() + PhiNotMatchedCount;
4279 NumMemoryInstsSelectCreated +=ST.countNewSelectNodes();
4280 }
4281returnResult;
4282 }
4283
4284 /// Try to match PHI node to Candidate.
4285 /// Matcher tracks the matched Phi nodes.
4286bool MatchPhiNode(PHINode *PHI,PHINode *Candidate,
4287SmallSetVector<PHIPair, 8> &Matcher,
4288 PhiNodeSet &PhiNodesToMatch) {
4289SmallVector<PHIPair, 8> WorkList;
4290 Matcher.insert({PHI, Candidate});
4291SmallSet<PHINode *, 8> MatchedPHIs;
4292 MatchedPHIs.insert(PHI);
4293 WorkList.push_back({PHI, Candidate});
4294SmallSet<PHIPair, 8> Visited;
4295while (!WorkList.empty()) {
4296auto Item = WorkList.pop_back_val();
4297if (!Visited.insert(Item).second)
4298continue;
4299// We iterate over all incoming values to Phi to compare them.
4300// If values are different and both of them Phi and the first one is a
4301// Phi we added (subject to match) and both of them is in the same basic
4302// block then we can match our pair if values match. So we state that
4303// these values match and add it to work list to verify that.
4304for (auto *B : Item.first->blocks()) {
4305Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4306Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4307if (FirstValue == SecondValue)
4308continue;
4309
4310PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4311PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4312
4313// One of them is not Phi or
4314// The first one is not Phi node from the set we'd like to match or
4315// Phi nodes from different basic blocks then
4316// we will not be able to match.
4317if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4318 FirstPhi->getParent() != SecondPhi->getParent())
4319returnfalse;
4320
4321// If we already matched them then continue.
4322if (Matcher.count({FirstPhi, SecondPhi}))
4323continue;
4324// So the values are different and does not match. So we need them to
4325// match. (But we register no more than one match per PHI node, so that
4326// we won't later try to replace them twice.)
4327if (MatchedPHIs.insert(FirstPhi).second)
4328 Matcher.insert({FirstPhi, SecondPhi});
4329// But me must check it.
4330 WorkList.push_back({FirstPhi, SecondPhi});
4331 }
4332 }
4333returntrue;
4334 }
4335
4336 /// For the given set of PHI nodes (in the SimplificationTracker) try
4337 /// to find their equivalents.
4338 /// Returns false if this matching fails and creation of new Phi is disabled.
4339bool MatchPhiSet(SimplificationTracker &ST,bool AllowNewPhiNodes,
4340unsigned &PhiNotMatchedCount) {
4341// Matched and PhiNodesToMatch iterate their elements in a deterministic
4342// order, so the replacements (ReplacePhi) are also done in a deterministic
4343// order.
4344SmallSetVector<PHIPair, 8> Matched;
4345SmallPtrSet<PHINode *, 8> WillNotMatch;
4346 PhiNodeSet &PhiNodesToMatch =ST.newPhiNodes();
4347while (PhiNodesToMatch.size()) {
4348PHINode *PHI = *PhiNodesToMatch.begin();
4349
4350// Add us, if no Phi nodes in the basic block we do not match.
4351 WillNotMatch.clear();
4352 WillNotMatch.insert(PHI);
4353
4354// Traverse all Phis until we found equivalent or fail to do that.
4355bool IsMatched =false;
4356for (auto &P :PHI->getParent()->phis()) {
4357// Skip new Phi nodes.
4358if (PhiNodesToMatch.count(&P))
4359continue;
4360if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4361break;
4362// If it does not match, collect all Phi nodes from matcher.
4363// if we end up with no match, them all these Phi nodes will not match
4364// later.
4365for (auto M : Matched)
4366 WillNotMatch.insert(M.first);
4367 Matched.clear();
4368 }
4369if (IsMatched) {
4370// Replace all matched values and erase them.
4371for (auto MV : Matched)
4372ST.ReplacePhi(MV.first, MV.second);
4373 Matched.clear();
4374continue;
4375 }
4376// If we are not allowed to create new nodes then bail out.
4377if (!AllowNewPhiNodes)
4378returnfalse;
4379// Just remove all seen values in matcher. They will not match anything.
4380 PhiNotMatchedCount += WillNotMatch.size();
4381for (auto *P : WillNotMatch)
4382 PhiNodesToMatch.erase(P);
4383 }
4384returntrue;
4385 }
4386 /// Fill the placeholders with values from predecessors and simplify them.
4387void FillPlaceholders(FoldAddrToValueMapping &Map,
4388SmallVectorImpl<Value *> &TraverseOrder,
4389 SimplificationTracker &ST) {
4390while (!TraverseOrder.empty()) {
4391Value *Current = TraverseOrder.pop_back_val();
4392assert(Map.contains(Current) &&"No node to fill!!!");
4393Value *V =Map[Current];
4394
4395if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4396// CurrentValue also must be Select.
4397auto *CurrentSelect = cast<SelectInst>(Current);
4398auto *TrueValue = CurrentSelect->getTrueValue();
4399assert(Map.contains(TrueValue) &&"No True Value!");
4400Select->setTrueValue(ST.Get(Map[TrueValue]));
4401auto *FalseValue = CurrentSelect->getFalseValue();
4402assert(Map.contains(FalseValue) &&"No False Value!");
4403Select->setFalseValue(ST.Get(Map[FalseValue]));
4404 }else {
4405// Must be a Phi node then.
4406auto *PHI = cast<PHINode>(V);
4407// Fill the Phi node with values from predecessors.
4408for (auto *B :predecessors(PHI->getParent())) {
4409Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4410assert(Map.contains(PV) &&"No predecessor Value!");
4411PHI->addIncoming(ST.Get(Map[PV]),B);
4412 }
4413 }
4414Map[Current] =ST.Simplify(V);
4415 }
4416 }
4417
4418 /// Starting from original value recursively iterates over def-use chain up to
4419 /// known ending values represented in a map. For each traversed phi/select
4420 /// inserts a placeholder Phi or Select.
4421 /// Reports all new created Phi/Select nodes by adding them to set.
4422 /// Also reports and order in what values have been traversed.
4423void InsertPlaceholders(FoldAddrToValueMapping &Map,
4424SmallVectorImpl<Value *> &TraverseOrder,
4425 SimplificationTracker &ST) {
4426SmallVector<Value *, 32> Worklist;
4427assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4428"Address must be a Phi or Select node");
4429auto *Dummy =PoisonValue::get(CommonType);
4430 Worklist.push_back(Original);
4431while (!Worklist.empty()) {
4432Value *Current = Worklist.pop_back_val();
4433// if it is already visited or it is an ending value then skip it.
4434if (Map.contains(Current))
4435continue;
4436 TraverseOrder.push_back(Current);
4437
4438// CurrentValue must be a Phi node or select. All others must be covered
4439// by anchors.
4440if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4441// Is it OK to get metadata from OrigSelect?!
4442// Create a Select placeholder with dummy value.
4443SelectInst *Select =
4444SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4445 CurrentSelect->getName(),
4446 CurrentSelect->getIterator(), CurrentSelect);
4447Map[Current] =Select;
4448ST.insertNewSelect(Select);
4449// We are interested in True and False values.
4450 Worklist.push_back(CurrentSelect->getTrueValue());
4451 Worklist.push_back(CurrentSelect->getFalseValue());
4452 }else {
4453// It must be a Phi node then.
4454PHINode *CurrentPhi = cast<PHINode>(Current);
4455unsigned PredCount = CurrentPhi->getNumIncomingValues();
4456PHINode *PHI =
4457PHINode::Create(CommonType, PredCount,"sunk_phi", CurrentPhi->getIterator());
4458Map[Current] =PHI;
4459ST.insertNewPhi(PHI);
4460append_range(Worklist, CurrentPhi->incoming_values());
4461 }
4462 }
4463 }
4464
4465bool addrModeCombiningAllowed() {
4466if (DisableComplexAddrModes)
4467returnfalse;
4468switch (DifferentField) {
4469default:
4470returnfalse;
4471case ExtAddrMode::BaseRegField:
4472returnAddrSinkCombineBaseReg;
4473case ExtAddrMode::BaseGVField:
4474returnAddrSinkCombineBaseGV;
4475case ExtAddrMode::BaseOffsField:
4476returnAddrSinkCombineBaseOffs;
4477case ExtAddrMode::ScaledRegField:
4478returnAddrSinkCombineScaledReg;
4479 }
4480 }
4481};
4482}// end anonymous namespace
4483
4484/// Try adding ScaleReg*Scale to the current addressing mode.
4485/// Return true and update AddrMode if this addr mode is legal for the target,
4486/// false if not.
4487bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4488unsignedDepth) {
4489// If Scale is 1, then this is the same as adding ScaleReg to the addressing
4490// mode. Just process that directly.
4491if (Scale == 1)
4492return matchAddr(ScaleReg,Depth);
4493
4494// If the scale is 0, it takes nothing to add this.
4495if (Scale == 0)
4496returntrue;
4497
4498// If we already have a scale of this value, we can add to it, otherwise, we
4499// need an available scale field.
4500if (AddrMode.Scale != 0 &&AddrMode.ScaledReg != ScaleReg)
4501returnfalse;
4502
4503ExtAddrMode TestAddrMode =AddrMode;
4504
4505// Add scale to turn X*4+X*3 -> X*7. This could also do things like
4506// [A+B + A*7] -> [B+A*8].
4507 TestAddrMode.Scale += Scale;
4508 TestAddrMode.ScaledReg = ScaleReg;
4509
4510// If the new address isn't legal, bail out.
4511if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4512returnfalse;
4513
4514// It was legal, so commit it.
4515AddrMode = TestAddrMode;
4516
4517// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4518// to see if ScaleReg is actually X+C. If so, we can turn this into adding
4519// X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4520// go any further: we can reuse it and cannot eliminate it.
4521ConstantInt *CI =nullptr;
4522Value *AddLHS =nullptr;
4523if (isa<Instruction>(ScaleReg) &&// not a constant expr.
4524match(ScaleReg,m_Add(m_Value(AddLHS),m_ConstantInt(CI))) &&
4525 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4526 TestAddrMode.InBounds =false;
4527 TestAddrMode.ScaledReg = AddLHS;
4528 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4529
4530// If this addressing mode is legal, commit it and remember that we folded
4531// this instruction.
4532if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4533 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4534AddrMode = TestAddrMode;
4535returntrue;
4536 }
4537// Restore status quo.
4538 TestAddrMode =AddrMode;
4539 }
4540
4541// If this is an add recurrence with a constant step, return the increment
4542// instruction and the canonicalized step.
4543auto GetConstantStep =
4544 [this](constValue *V) -> std::optional<std::pair<Instruction *, APInt>> {
4545auto *PN = dyn_cast<PHINode>(V);
4546if (!PN)
4547return std::nullopt;
4548auto IVInc =getIVIncrement(PN, &LI);
4549if (!IVInc)
4550return std::nullopt;
4551// TODO: The result of the intrinsics above is two-complement. However when
4552// IV inc is expressed as add or sub, iv.next is potentially a poison value.
4553// If it has nuw or nsw flags, we need to make sure that these flags are
4554// inferrable at the point of memory instruction. Otherwise we are replacing
4555// well-defined two-complement computation with poison. Currently, to avoid
4556// potentially complex analysis needed to prove this, we reject such cases.
4557if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4558if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4559return std::nullopt;
4560if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4561return std::make_pair(IVInc->first, ConstantStep->getValue());
4562return std::nullopt;
4563 };
4564
4565// Try to account for the following special case:
4566// 1. ScaleReg is an inductive variable;
4567// 2. We use it with non-zero offset;
4568// 3. IV's increment is available at the point of memory instruction.
4569//
4570// In this case, we may reuse the IV increment instead of the IV Phi to
4571// achieve the following advantages:
4572// 1. If IV step matches the offset, we will have no need in the offset;
4573// 2. Even if they don't match, we will reduce the overlap of living IV
4574// and IV increment, that will potentially lead to better register
4575// assignment.
4576if (AddrMode.BaseOffs) {
4577if (auto IVStep = GetConstantStep(ScaleReg)) {
4578Instruction *IVInc = IVStep->first;
4579// The following assert is important to ensure a lack of infinite loops.
4580// This transforms is (intentionally) the inverse of the one just above.
4581// If they don't agree on the definition of an increment, we'd alternate
4582// back and forth indefinitely.
4583assert(isIVIncrement(IVInc, &LI) &&"implied by GetConstantStep");
4584APInt Step = IVStep->second;
4585APIntOffset = Step *AddrMode.Scale;
4586if (Offset.isSignedIntN(64)) {
4587 TestAddrMode.InBounds =false;
4588 TestAddrMode.ScaledReg = IVInc;
4589 TestAddrMode.BaseOffs -=Offset.getLimitedValue();
4590// If this addressing mode is legal, commit it..
4591// (Note that we defer the (expensive) domtree base legality check
4592// to the very last possible point.)
4593if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4594 getDTFn().dominates(IVInc, MemoryInst)) {
4595 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4596AddrMode = TestAddrMode;
4597returntrue;
4598 }
4599// Restore status quo.
4600 TestAddrMode =AddrMode;
4601 }
4602 }
4603 }
4604
4605// Otherwise, just return what we have.
4606returntrue;
4607}
4608
4609/// This is a little filter, which returns true if an addressing computation
4610/// involving I might be folded into a load/store accessing it.
4611/// This doesn't need to be perfect, but needs to accept at least
4612/// the set of instructions that MatchOperationAddr can.
4613staticboolMightBeFoldableInst(Instruction *I) {
4614switch (I->getOpcode()) {
4615case Instruction::BitCast:
4616case Instruction::AddrSpaceCast:
4617// Don't touch identity bitcasts.
4618if (I->getType() ==I->getOperand(0)->getType())
4619returnfalse;
4620returnI->getType()->isIntOrPtrTy();
4621case Instruction::PtrToInt:
4622// PtrToInt is always a noop, as we know that the int type is pointer sized.
4623returntrue;
4624case Instruction::IntToPtr:
4625// We know the input is intptr_t, so this is foldable.
4626returntrue;
4627case Instruction::Add:
4628returntrue;
4629case Instruction::Mul:
4630case Instruction::Shl:
4631// Can only handle X*C and X << C.
4632return isa<ConstantInt>(I->getOperand(1));
4633case Instruction::GetElementPtr:
4634returntrue;
4635default:
4636returnfalse;
4637 }
4638}
4639
4640/// Check whether or not \p Val is a legal instruction for \p TLI.
4641/// \note \p Val is assumed to be the product of some type promotion.
4642/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4643/// to be legal, as the non-promoted value would have had the same state.
4644staticboolisPromotedInstructionLegal(constTargetLowering &TLI,
4645constDataLayout &DL,Value *Val) {
4646Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4647if (!PromotedInst)
4648returnfalse;
4649int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4650// If the ISDOpcode is undefined, it was undefined before the promotion.
4651if (!ISDOpcode)
4652returntrue;
4653// Otherwise, check if the promoted instruction is legal or not.
4654return TLI.isOperationLegalOrCustom(
4655 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4656}
4657
4658namespace{
4659
4660/// Hepler class to perform type promotion.
4661classTypePromotionHelper {
4662 /// Utility function to add a promoted instruction \p ExtOpnd to
4663 /// \p PromotedInsts and record the type of extension we have seen.
4664staticvoid addPromotedInst(InstrToOrigTy &PromotedInsts,
4665Instruction *ExtOpnd,bool IsSExt) {
4666 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4667 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4668if (It != PromotedInsts.end()) {
4669// If the new extension is same as original, the information in
4670// PromotedInsts[ExtOpnd] is still correct.
4671if (It->second.getInt() == ExtTy)
4672return;
4673
4674// Now the new extension is different from old extension, we make
4675// the type information invalid by setting extension type to
4676// BothExtension.
4677 ExtTy = BothExtension;
4678 }
4679 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4680 }
4681
4682 /// Utility function to query the original type of instruction \p Opnd
4683 /// with a matched extension type. If the extension doesn't match, we
4684 /// cannot use the information we had on the original type.
4685 /// BothExtension doesn't match any extension type.
4686staticconstType *getOrigType(const InstrToOrigTy &PromotedInsts,
4687Instruction *Opnd,bool IsSExt) {
4688 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4689 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4690if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4691return It->second.getPointer();
4692returnnullptr;
4693 }
4694
4695 /// Utility function to check whether or not a sign or zero extension
4696 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4697 /// either using the operands of \p Inst or promoting \p Inst.
4698 /// The type of the extension is defined by \p IsSExt.
4699 /// In other words, check if:
4700 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4701 /// #1 Promotion applies:
4702 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4703 /// #2 Operand reuses:
4704 /// ext opnd1 to ConsideredExtType.
4705 /// \p PromotedInsts maps the instructions to their type before promotion.
4706staticbool canGetThrough(constInstruction *Inst,Type *ConsideredExtType,
4707const InstrToOrigTy &PromotedInsts,bool IsSExt);
4708
4709 /// Utility function to determine if \p OpIdx should be promoted when
4710 /// promoting \p Inst.
4711staticbool shouldExtOperand(constInstruction *Inst,int OpIdx) {
4712return !(isa<SelectInst>(Inst) && OpIdx == 0);
4713 }
4714
4715 /// Utility function to promote the operand of \p Ext when this
4716 /// operand is a promotable trunc or sext or zext.
4717 /// \p PromotedInsts maps the instructions to their type before promotion.
4718 /// \p CreatedInstsCost[out] contains the cost of all instructions
4719 /// created to promote the operand of Ext.
4720 /// Newly added extensions are inserted in \p Exts.
4721 /// Newly added truncates are inserted in \p Truncs.
4722 /// Should never be called directly.
4723 /// \return The promoted value which is used instead of Ext.
4724staticValue *promoteOperandForTruncAndAnyExt(
4725Instruction *Ext, TypePromotionTransaction &TPT,
4726 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,
4727SmallVectorImpl<Instruction *> *Exts,
4728SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI);
4729
4730 /// Utility function to promote the operand of \p Ext when this
4731 /// operand is promotable and is not a supported trunc or sext.
4732 /// \p PromotedInsts maps the instructions to their type before promotion.
4733 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4734 /// created to promote the operand of Ext.
4735 /// Newly added extensions are inserted in \p Exts.
4736 /// Newly added truncates are inserted in \p Truncs.
4737 /// Should never be called directly.
4738 /// \return The promoted value which is used instead of Ext.
4739staticValue *promoteOperandForOther(Instruction *Ext,
4740 TypePromotionTransaction &TPT,
4741 InstrToOrigTy &PromotedInsts,
4742unsigned &CreatedInstsCost,
4743SmallVectorImpl<Instruction *> *Exts,
4744SmallVectorImpl<Instruction *> *Truncs,
4745constTargetLowering &TLI,bool IsSExt);
4746
4747 /// \see promoteOperandForOther.
4748staticValue *signExtendOperandForOther(
4749Instruction *Ext, TypePromotionTransaction &TPT,
4750 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,
4751SmallVectorImpl<Instruction *> *Exts,
4752SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI) {
4753return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4754 Exts, Truncs, TLI,true);
4755 }
4756
4757 /// \see promoteOperandForOther.
4758staticValue *zeroExtendOperandForOther(
4759Instruction *Ext, TypePromotionTransaction &TPT,
4760 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,
4761SmallVectorImpl<Instruction *> *Exts,
4762SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI) {
4763return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4764 Exts, Truncs, TLI,false);
4765 }
4766
4767public:
4768 /// Type for the utility function that promotes the operand of Ext.
4769usingAction =Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4770 InstrToOrigTy &PromotedInsts,
4771unsigned &CreatedInstsCost,
4772SmallVectorImpl<Instruction *> *Exts,
4773SmallVectorImpl<Instruction *> *Truncs,
4774constTargetLowering &TLI);
4775
4776 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4777 /// action to promote the operand of \p Ext instead of using Ext.
4778 /// \return NULL if no promotable action is possible with the current
4779 /// sign extension.
4780 /// \p InsertedInsts keeps track of all the instructions inserted by the
4781 /// other CodeGenPrepare optimizations. This information is important
4782 /// because we do not want to promote these instructions as CodeGenPrepare
4783 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4784 /// \p PromotedInsts maps the instructions to their type before promotion.
4785static Action getAction(Instruction *Ext,const SetOfInstrs &InsertedInsts,
4786constTargetLowering &TLI,
4787const InstrToOrigTy &PromotedInsts);
4788};
4789
4790}// end anonymous namespace
4791
4792bool TypePromotionHelper::canGetThrough(constInstruction *Inst,
4793Type *ConsideredExtType,
4794const InstrToOrigTy &PromotedInsts,
4795bool IsSExt) {
4796// The promotion helper does not know how to deal with vector types yet.
4797// To be able to fix that, we would need to fix the places where we
4798// statically extend, e.g., constants and such.
4799if (Inst->getType()->isVectorTy())
4800returnfalse;
4801
4802// We can always get through zext.
4803if (isa<ZExtInst>(Inst))
4804returntrue;
4805
4806// sext(sext) is ok too.
4807if (IsSExt && isa<SExtInst>(Inst))
4808returntrue;
4809
4810// We can get through binary operator, if it is legal. In other words, the
4811// binary operator must have a nuw or nsw flag.
4812if (constauto *BinOp = dyn_cast<BinaryOperator>(Inst))
4813if (isa<OverflowingBinaryOperator>(BinOp) &&
4814 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4815 (IsSExt && BinOp->hasNoSignedWrap())))
4816returntrue;
4817
4818// ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4819if ((Inst->getOpcode() == Instruction::And ||
4820 Inst->getOpcode() == Instruction::Or))
4821returntrue;
4822
4823// ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4824if (Inst->getOpcode() == Instruction::Xor) {
4825// Make sure it is not a NOT.
4826if (constauto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4827if (!Cst->getValue().isAllOnes())
4828returntrue;
4829 }
4830
4831// zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4832// It may change a poisoned value into a regular value, like
4833// zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4834// poisoned value regular value
4835// It should be OK since undef covers valid value.
4836if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4837returntrue;
4838
4839// and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4840// It may change a poisoned value into a regular value, like
4841// zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4842// poisoned value regular value
4843// It should be OK since undef covers valid value.
4844if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4845constauto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4846if (ExtInst->hasOneUse()) {
4847constauto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4848if (AndInst && AndInst->getOpcode() == Instruction::And) {
4849constauto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4850if (Cst &&
4851 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4852returntrue;
4853 }
4854 }
4855 }
4856
4857// Check if we can do the following simplification.
4858// ext(trunc(opnd)) --> ext(opnd)
4859if (!isa<TruncInst>(Inst))
4860returnfalse;
4861
4862Value *OpndVal = Inst->getOperand(0);
4863// Check if we can use this operand in the extension.
4864// If the type is larger than the result type of the extension, we cannot.
4865if (!OpndVal->getType()->isIntegerTy() ||
4866 OpndVal->getType()->getIntegerBitWidth() >
4867 ConsideredExtType->getIntegerBitWidth())
4868returnfalse;
4869
4870// If the operand of the truncate is not an instruction, we will not have
4871// any information on the dropped bits.
4872// (Actually we could for constant but it is not worth the extra logic).
4873Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4874if (!Opnd)
4875returnfalse;
4876
4877// Check if the source of the type is narrow enough.
4878// I.e., check that trunc just drops extended bits of the same kind of
4879// the extension.
4880// #1 get the type of the operand and check the kind of the extended bits.
4881constType *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4882if (OpndType)
4883 ;
4884elseif ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4885 OpndType = Opnd->getOperand(0)->getType();
4886else
4887returnfalse;
4888
4889// #2 check that the truncate just drops extended bits.
4890return Inst->getType()->getIntegerBitWidth() >=
4891 OpndType->getIntegerBitWidth();
4892}
4893
4894TypePromotionHelper::Action TypePromotionHelper::getAction(
4895Instruction *Ext,const SetOfInstrs &InsertedInsts,
4896constTargetLowering &TLI,const InstrToOrigTy &PromotedInsts) {
4897assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4898"Unexpected instruction type");
4899Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4900Type *ExtTy =Ext->getType();
4901bool IsSExt = isa<SExtInst>(Ext);
4902// If the operand of the extension is not an instruction, we cannot
4903// get through.
4904// If it, check we can get through.
4905if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4906returnnullptr;
4907
4908// Do not promote if the operand has been added by codegenprepare.
4909// Otherwise, it means we are undoing an optimization that is likely to be
4910// redone, thus causing potential infinite loop.
4911if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4912returnnullptr;
4913
4914// SExt or Trunc instructions.
4915// Return the related handler.
4916if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4917 isa<ZExtInst>(ExtOpnd))
4918return promoteOperandForTruncAndAnyExt;
4919
4920// Regular instruction.
4921// Abort early if we will have to insert non-free instructions.
4922if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4923returnnullptr;
4924return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4925}
4926
4927Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4928Instruction *SExt, TypePromotionTransaction &TPT,
4929 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,
4930SmallVectorImpl<Instruction *> *Exts,
4931SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI) {
4932// By construction, the operand of SExt is an instruction. Otherwise we cannot
4933// get through it and this method should not be called.
4934Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4935Value *ExtVal = SExt;
4936bool HasMergedNonFreeExt =false;
4937if (isa<ZExtInst>(SExtOpnd)) {
4938// Replace s|zext(zext(opnd))
4939// => zext(opnd).
4940 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4941Value *ZExt =
4942 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4943 TPT.replaceAllUsesWith(SExt, ZExt);
4944 TPT.eraseInstruction(SExt);
4945 ExtVal = ZExt;
4946 }else {
4947// Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4948// => z|sext(opnd).
4949 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4950 }
4951 CreatedInstsCost = 0;
4952
4953// Remove dead code.
4954if (SExtOpnd->use_empty())
4955 TPT.eraseInstruction(SExtOpnd);
4956
4957// Check if the extension is still needed.
4958Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4959if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4960if (ExtInst) {
4961if (Exts)
4962 Exts->push_back(ExtInst);
4963 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4964 }
4965return ExtVal;
4966 }
4967
4968// At this point we have: ext ty opnd to ty.
4969// Reassign the uses of ExtInst to the opnd and remove ExtInst.
4970Value *NextVal = ExtInst->getOperand(0);
4971 TPT.eraseInstruction(ExtInst, NextVal);
4972return NextVal;
4973}
4974
4975Value *TypePromotionHelper::promoteOperandForOther(
4976Instruction *Ext, TypePromotionTransaction &TPT,
4977 InstrToOrigTy &PromotedInsts,unsigned &CreatedInstsCost,
4978SmallVectorImpl<Instruction *> *Exts,
4979SmallVectorImpl<Instruction *> *Truncs,constTargetLowering &TLI,
4980bool IsSExt) {
4981// By construction, the operand of Ext is an instruction. Otherwise we cannot
4982// get through it and this method should not be called.
4983Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4984 CreatedInstsCost = 0;
4985if (!ExtOpnd->hasOneUse()) {
4986// ExtOpnd will be promoted.
4987// All its uses, but Ext, will need to use a truncated value of the
4988// promoted version.
4989// Create the truncate now.
4990Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4991if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4992// Insert it just after the definition.
4993 ITrunc->moveAfter(ExtOpnd);
4994if (Truncs)
4995 Truncs->push_back(ITrunc);
4996 }
4997
4998 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4999// Restore the operand of Ext (which has been replaced by the previous call
5000// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5001 TPT.setOperand(Ext, 0, ExtOpnd);
5002 }
5003
5004// Get through the Instruction:
5005// 1. Update its type.
5006// 2. Replace the uses of Ext by Inst.
5007// 3. Extend each operand that needs to be extended.
5008
5009// Remember the original type of the instruction before promotion.
5010// This is useful to know that the high bits are sign extended bits.
5011 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5012// Step #1.
5013 TPT.mutateType(ExtOpnd,Ext->getType());
5014// Step #2.
5015 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5016// Step #3.
5017LLVM_DEBUG(dbgs() <<"Propagate Ext to operands\n");
5018for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5019 ++OpIdx) {
5020LLVM_DEBUG(dbgs() <<"Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) <<'\n');
5021if (ExtOpnd->getOperand(OpIdx)->getType() ==Ext->getType() ||
5022 !shouldExtOperand(ExtOpnd, OpIdx)) {
5023LLVM_DEBUG(dbgs() <<"No need to propagate\n");
5024continue;
5025 }
5026// Check if we can statically extend the operand.
5027Value *Opnd = ExtOpnd->getOperand(OpIdx);
5028if (constConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5029LLVM_DEBUG(dbgs() <<"Statically extend\n");
5030unsignedBitWidth =Ext->getType()->getIntegerBitWidth();
5031APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5032 : Cst->getValue().zext(BitWidth);
5033 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5034continue;
5035 }
5036// UndefValue are typed, so we have to statically sign extend them.
5037if (isa<UndefValue>(Opnd)) {
5038LLVM_DEBUG(dbgs() <<"Statically extend\n");
5039 TPT.setOperand(ExtOpnd, OpIdx,UndefValue::get(Ext->getType()));
5040continue;
5041 }
5042
5043// Otherwise we have to explicitly sign extend the operand.
5044Value *ValForExtOpnd = IsSExt
5045 ? TPT.createSExt(ExtOpnd, Opnd,Ext->getType())
5046 : TPT.createZExt(ExtOpnd, Opnd,Ext->getType());
5047 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5048Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5049if (!InstForExtOpnd)
5050continue;
5051
5052if (Exts)
5053 Exts->push_back(InstForExtOpnd);
5054
5055 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5056 }
5057LLVM_DEBUG(dbgs() <<"Extension is useless now\n");
5058 TPT.eraseInstruction(Ext);
5059return ExtOpnd;
5060}
5061
5062/// Check whether or not promoting an instruction to a wider type is profitable.
5063/// \p NewCost gives the cost of extension instructions created by the
5064/// promotion.
5065/// \p OldCost gives the cost of extension instructions before the promotion
5066/// plus the number of instructions that have been
5067/// matched in the addressing mode the promotion.
5068/// \p PromotedOperand is the value that has been promoted.
5069/// \return True if the promotion is profitable, false otherwise.
5070bool AddressingModeMatcher::isPromotionProfitable(
5071unsigned NewCost,unsigned OldCost,Value *PromotedOperand) const{
5072LLVM_DEBUG(dbgs() <<"OldCost: " << OldCost <<"\tNewCost: " << NewCost
5073 <<'\n');
5074// The cost of the new extensions is greater than the cost of the
5075// old extension plus what we folded.
5076// This is not profitable.
5077if (NewCost > OldCost)
5078returnfalse;
5079if (NewCost < OldCost)
5080returntrue;
5081// The promotion is neutral but it may help folding the sign extension in
5082// loads for instance.
5083// Check that we did not create an illegal instruction.
5084returnisPromotedInstructionLegal(TLI,DL, PromotedOperand);
5085}
5086
5087/// Given an instruction or constant expr, see if we can fold the operation
5088/// into the addressing mode. If so, update the addressing mode and return
5089/// true, otherwise return false without modifying AddrMode.
5090/// If \p MovedAway is not NULL, it contains the information of whether or
5091/// not AddrInst has to be folded into the addressing mode on success.
5092/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5093/// because it has been moved away.
5094/// Thus AddrInst must not be added in the matched instructions.
5095/// This state can happen when AddrInst is a sext, since it may be moved away.
5096/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5097/// not be referenced anymore.
5098bool AddressingModeMatcher::matchOperationAddr(User *AddrInst,unsigned Opcode,
5099unsignedDepth,
5100bool *MovedAway) {
5101// Avoid exponential behavior on extremely deep expression trees.
5102if (Depth >= 5)
5103returnfalse;
5104
5105// By default, all matched instructions stay in place.
5106if (MovedAway)
5107 *MovedAway =false;
5108
5109switch (Opcode) {
5110case Instruction::PtrToInt:
5111// PtrToInt is always a noop, as we know that the int type is pointer sized.
5112return matchAddr(AddrInst->getOperand(0),Depth);
5113case Instruction::IntToPtr: {
5114auto AS = AddrInst->getType()->getPointerAddressSpace();
5115auto PtrTy =MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5116// This inttoptr is a no-op if the integer type is pointer sized.
5117if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5118return matchAddr(AddrInst->getOperand(0),Depth);
5119returnfalse;
5120 }
5121case Instruction::BitCast:
5122// BitCast is always a noop, and we can handle it as long as it is
5123// int->int or pointer->pointer (we don't want int<->fp or something).
5124if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5125// Don't touch identity bitcasts. These were probably put here by LSR,
5126// and we don't want to mess around with them. Assume it knows what it
5127// is doing.
5128 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5129return matchAddr(AddrInst->getOperand(0),Depth);
5130returnfalse;
5131case Instruction::AddrSpaceCast: {
5132unsigned SrcAS =
5133 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5134unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5135if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5136return matchAddr(AddrInst->getOperand(0),Depth);
5137returnfalse;
5138 }
5139case Instruction::Add: {
5140// Check to see if we can merge in one operand, then the other. If so, we
5141// win.
5142ExtAddrMode BackupAddrMode =AddrMode;
5143unsigned OldSize = AddrModeInsts.size();
5144// Start a transaction at this point.
5145// The LHS may match but not the RHS.
5146// Therefore, we need a higher level restoration point to undo partially
5147// matched operation.
5148 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5149 TPT.getRestorationPoint();
5150
5151// Try to match an integer constant second to increase its chance of ending
5152// up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5153intFirst = 0, Second = 1;
5154if (isa<ConstantInt>(AddrInst->getOperand(First))
5155 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5156std::swap(First, Second);
5157AddrMode.InBounds =false;
5158if (matchAddr(AddrInst->getOperand(First),Depth + 1) &&
5159 matchAddr(AddrInst->getOperand(Second),Depth + 1))
5160returntrue;
5161
5162// Restore the old addr mode info.
5163AddrMode = BackupAddrMode;
5164 AddrModeInsts.resize(OldSize);
5165 TPT.rollback(LastKnownGood);
5166
5167// Otherwise this was over-aggressive. Try merging operands in the opposite
5168// order.
5169if (matchAddr(AddrInst->getOperand(Second),Depth + 1) &&
5170 matchAddr(AddrInst->getOperand(First),Depth + 1))
5171returntrue;
5172
5173// Otherwise we definitely can't merge the ADD in.
5174AddrMode = BackupAddrMode;
5175 AddrModeInsts.resize(OldSize);
5176 TPT.rollback(LastKnownGood);
5177break;
5178 }
5179// case Instruction::Or:
5180// TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5181// break;
5182case Instruction::Mul:
5183case Instruction::Shl: {
5184// Can only handle X*C and X << C.
5185AddrMode.InBounds =false;
5186ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5187if (!RHS ||RHS->getBitWidth() > 64)
5188returnfalse;
5189 int64_t Scale = Opcode == Instruction::Shl
5190 ? 1LL <<RHS->getLimitedValue(RHS->getBitWidth() - 1)
5191 :RHS->getSExtValue();
5192
5193return matchScaledValue(AddrInst->getOperand(0), Scale,Depth);
5194 }
5195case Instruction::GetElementPtr: {
5196// Scan the GEP. We check it if it contains constant offsets and at most
5197// one variable offset.
5198int VariableOperand = -1;
5199unsigned VariableScale = 0;
5200
5201 int64_t ConstantOffset = 0;
5202gep_type_iterator GTI =gep_type_begin(AddrInst);
5203for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5204if (StructType *STy = GTI.getStructTypeOrNull()) {
5205constStructLayout *SL =DL.getStructLayout(STy);
5206unsignedIdx =
5207 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5208 ConstantOffset += SL->getElementOffset(Idx);
5209 }else {
5210TypeSize TS = GTI.getSequentialElementStride(DL);
5211if (TS.isNonZero()) {
5212// The optimisations below currently only work for fixed offsets.
5213if (TS.isScalable())
5214returnfalse;
5215 int64_tTypeSize = TS.getFixedValue();
5216if (ConstantInt *CI =
5217 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5218constAPInt &CVal = CI->getValue();
5219if (CVal.getSignificantBits() <= 64) {
5220 ConstantOffset += CVal.getSExtValue() *TypeSize;
5221continue;
5222 }
5223 }
5224// We only allow one variable index at the moment.
5225if (VariableOperand != -1)
5226returnfalse;
5227
5228// Remember the variable index.
5229 VariableOperand = i;
5230 VariableScale =TypeSize;
5231 }
5232 }
5233 }
5234
5235// A common case is for the GEP to only do a constant offset. In this case,
5236// just add it to the disp field and check validity.
5237if (VariableOperand == -1) {
5238AddrMode.BaseOffs += ConstantOffset;
5239if (matchAddr(AddrInst->getOperand(0),Depth + 1)) {
5240if (!cast<GEPOperator>(AddrInst)->isInBounds())
5241AddrMode.InBounds =false;
5242returntrue;
5243 }
5244AddrMode.BaseOffs -= ConstantOffset;
5245
5246if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
5247 TLI.shouldConsiderGEPOffsetSplit() &&Depth == 0 &&
5248 ConstantOffset > 0) {
5249// Record GEPs with non-zero offsets as candidates for splitting in
5250// the event that the offset cannot fit into the r+i addressing mode.
5251// Simple and common case that only one GEP is used in calculating the
5252// address for the memory access.
5253Value *Base = AddrInst->getOperand(0);
5254auto *BaseI = dyn_cast<Instruction>(Base);
5255auto *GEP = cast<GetElementPtrInst>(AddrInst);
5256if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
5257 (BaseI && !isa<CastInst>(BaseI) &&
5258 !isa<GetElementPtrInst>(BaseI))) {
5259// Make sure the parent block allows inserting non-PHI instructions
5260// before the terminator.
5261BasicBlock *Parent = BaseI ? BaseI->getParent()
5262 : &GEP->getFunction()->getEntryBlock();
5263if (!Parent->getTerminator()->isEHPad())
5264 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5265 }
5266 }
5267
5268returnfalse;
5269 }
5270
5271// Save the valid addressing mode in case we can't match.
5272ExtAddrMode BackupAddrMode =AddrMode;
5273unsigned OldSize = AddrModeInsts.size();
5274
5275// See if the scale and offset amount is valid for this target.
5276AddrMode.BaseOffs += ConstantOffset;
5277if (!cast<GEPOperator>(AddrInst)->isInBounds())
5278AddrMode.InBounds =false;
5279
5280// Match the base operand of the GEP.
5281if (!matchAddr(AddrInst->getOperand(0),Depth + 1)) {
5282// If it couldn't be matched, just stuff the value in a register.
5283if (AddrMode.HasBaseReg) {
5284AddrMode = BackupAddrMode;
5285 AddrModeInsts.resize(OldSize);
5286returnfalse;
5287 }
5288AddrMode.HasBaseReg =true;
5289AddrMode.BaseReg = AddrInst->getOperand(0);
5290 }
5291
5292// Match the remaining variable portion of the GEP.
5293if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5294Depth)) {
5295// If it couldn't be matched, try stuffing the base into a register
5296// instead of matching it, and retrying the match of the scale.
5297AddrMode = BackupAddrMode;
5298 AddrModeInsts.resize(OldSize);
5299if (AddrMode.HasBaseReg)
5300returnfalse;
5301AddrMode.HasBaseReg =true;
5302AddrMode.BaseReg = AddrInst->getOperand(0);
5303AddrMode.BaseOffs += ConstantOffset;
5304if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5305 VariableScale,Depth)) {
5306// If even that didn't work, bail.
5307AddrMode = BackupAddrMode;
5308 AddrModeInsts.resize(OldSize);
5309returnfalse;
5310 }
5311 }
5312
5313returntrue;
5314 }
5315case Instruction::SExt:
5316case Instruction::ZExt: {
5317Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5318if (!Ext)
5319returnfalse;
5320
5321// Try to move this ext out of the way of the addressing mode.
5322// Ask for a method for doing so.
5323 TypePromotionHelper::Action TPH =
5324 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5325if (!TPH)
5326returnfalse;
5327
5328 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5329 TPT.getRestorationPoint();
5330unsigned CreatedInstsCost = 0;
5331unsigned ExtCost = !TLI.isExtFree(Ext);
5332Value *PromotedOperand =
5333 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost,nullptr,nullptr, TLI);
5334// SExt has been moved away.
5335// Thus either it will be rematched later in the recursive calls or it is
5336// gone. Anyway, we must not fold it into the addressing mode at this point.
5337// E.g.,
5338// op = add opnd, 1
5339// idx = ext op
5340// addr = gep base, idx
5341// is now:
5342// promotedOpnd = ext opnd <- no match here
5343// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5344// addr = gep base, op <- match
5345if (MovedAway)
5346 *MovedAway =true;
5347
5348assert(PromotedOperand &&
5349"TypePromotionHelper should have filtered out those cases");
5350
5351ExtAddrMode BackupAddrMode =AddrMode;
5352unsigned OldSize = AddrModeInsts.size();
5353
5354if (!matchAddr(PromotedOperand,Depth) ||
5355// The total of the new cost is equal to the cost of the created
5356// instructions.
5357// The total of the old cost is equal to the cost of the extension plus
5358// what we have saved in the addressing mode.
5359 !isPromotionProfitable(CreatedInstsCost,
5360 ExtCost + (AddrModeInsts.size() - OldSize),
5361 PromotedOperand)) {
5362AddrMode = BackupAddrMode;
5363 AddrModeInsts.resize(OldSize);
5364LLVM_DEBUG(dbgs() <<"Sign extension does not pay off: rollback\n");
5365 TPT.rollback(LastKnownGood);
5366returnfalse;
5367 }
5368returntrue;
5369 }
5370case Instruction::Call:
5371if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5372if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5373GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5374if (TLI.addressingModeSupportsTLS(GV))
5375return matchAddr(AddrInst->getOperand(0),Depth);
5376 }
5377 }
5378break;
5379 }
5380returnfalse;
5381}
5382
5383/// If we can, try to add the value of 'Addr' into the current addressing mode.
5384/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5385/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5386/// for the target.
5387///
5388bool AddressingModeMatcher::matchAddr(Value *Addr,unsignedDepth) {
5389// Start a transaction at this point that we will rollback if the matching
5390// fails.
5391 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5392 TPT.getRestorationPoint();
5393if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5394if (CI->getValue().isSignedIntN(64)) {
5395// Fold in immediates if legal for the target.
5396AddrMode.BaseOffs += CI->getSExtValue();
5397if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))
5398returntrue;
5399AddrMode.BaseOffs -= CI->getSExtValue();
5400 }
5401 }elseif (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5402// If this is a global variable, try to fold it into the addressing mode.
5403if (!AddrMode.BaseGV) {
5404AddrMode.BaseGV = GV;
5405if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))
5406returntrue;
5407AddrMode.BaseGV =nullptr;
5408 }
5409 }elseif (Instruction *I = dyn_cast<Instruction>(Addr)) {
5410ExtAddrMode BackupAddrMode =AddrMode;
5411unsigned OldSize = AddrModeInsts.size();
5412
5413// Check to see if it is possible to fold this operation.
5414bool MovedAway =false;
5415if (matchOperationAddr(I,I->getOpcode(),Depth, &MovedAway)) {
5416// This instruction may have been moved away. If so, there is nothing
5417// to check here.
5418if (MovedAway)
5419returntrue;
5420// Okay, it's possible to fold this. Check to see if it is actually
5421// *profitable* to do so. We use a simple cost model to avoid increasing
5422// register pressure too much.
5423if (I->hasOneUse() ||
5424 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode,AddrMode)) {
5425 AddrModeInsts.push_back(I);
5426returntrue;
5427 }
5428
5429// It isn't profitable to do this, roll back.
5430AddrMode = BackupAddrMode;
5431 AddrModeInsts.resize(OldSize);
5432 TPT.rollback(LastKnownGood);
5433 }
5434 }elseif (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5435if (matchOperationAddr(CE,CE->getOpcode(),Depth))
5436returntrue;
5437 TPT.rollback(LastKnownGood);
5438 }elseif (isa<ConstantPointerNull>(Addr)) {
5439// Null pointer gets folded without affecting the addressing mode.
5440returntrue;
5441 }
5442
5443// Worse case, the target should support [reg] addressing modes. :)
5444if (!AddrMode.HasBaseReg) {
5445AddrMode.HasBaseReg =true;
5446AddrMode.BaseReg =Addr;
5447// Still check for legality in case the target supports [imm] but not [i+r].
5448if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))
5449returntrue;
5450AddrMode.HasBaseReg =false;
5451AddrMode.BaseReg =nullptr;
5452 }
5453
5454// If the base register is already taken, see if we can do [r+r].
5455if (AddrMode.Scale == 0) {
5456AddrMode.Scale = 1;
5457AddrMode.ScaledReg =Addr;
5458if (TLI.isLegalAddressingMode(DL,AddrMode, AccessTy, AddrSpace))
5459returntrue;
5460AddrMode.Scale = 0;
5461AddrMode.ScaledReg =nullptr;
5462 }
5463// Couldn't match.
5464 TPT.rollback(LastKnownGood);
5465returnfalse;
5466}
5467
5468/// Check to see if all uses of OpVal by the specified inline asm call are due
5469/// to memory operands. If so, return true, otherwise return false.
5470staticboolIsOperandAMemoryOperand(CallInst *CI,InlineAsm *IA,Value *OpVal,
5471constTargetLowering &TLI,
5472constTargetRegisterInfo &TRI) {
5473constFunction *F = CI->getFunction();
5474TargetLowering::AsmOperandInfoVector TargetConstraints =
5475 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5476
5477for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5478// Compute the constraint code and ConstraintType to use.
5479 TLI.ComputeConstraintToUse(OpInfo,SDValue());
5480
5481// If this asm operand is our Value*, and if it isn't an indirect memory
5482// operand, we can't fold it! TODO: Also handle C_Address?
5483if (OpInfo.CallOperandVal == OpVal &&
5484 (OpInfo.ConstraintType !=TargetLowering::C_Memory ||
5485 !OpInfo.isIndirect))
5486returnfalse;
5487 }
5488
5489returntrue;
5490}
5491
5492/// Recursively walk all the uses of I until we find a memory use.
5493/// If we find an obviously non-foldable instruction, return true.
5494/// Add accessed addresses and types to MemoryUses.
5495staticboolFindAllMemoryUses(
5496Instruction *I,SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5497SmallPtrSetImpl<Instruction *> &ConsideredInsts,constTargetLowering &TLI,
5498constTargetRegisterInfo &TRI,bool OptSize,ProfileSummaryInfo *PSI,
5499BlockFrequencyInfo *BFI,unsigned &SeenInsts) {
5500// If we already considered this instruction, we're done.
5501if (!ConsideredInsts.insert(I).second)
5502returnfalse;
5503
5504// If this is an obviously unfoldable instruction, bail out.
5505if (!MightBeFoldableInst(I))
5506returntrue;
5507
5508// Loop over all the uses, recursively processing them.
5509for (Use &U :I->uses()) {
5510// Conservatively return true if we're seeing a large number or a deep chain
5511// of users. This avoids excessive compilation times in pathological cases.
5512if (SeenInsts++ >=MaxAddressUsersToScan)
5513returntrue;
5514
5515Instruction *UserI = cast<Instruction>(U.getUser());
5516if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5517 MemoryUses.push_back({&U, LI->getType()});
5518continue;
5519 }
5520
5521if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5522if (U.getOperandNo() !=StoreInst::getPointerOperandIndex())
5523returntrue;// Storing addr, not into addr.
5524 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5525continue;
5526 }
5527
5528if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5529if (U.getOperandNo() !=AtomicRMWInst::getPointerOperandIndex())
5530returntrue;// Storing addr, not into addr.
5531 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5532continue;
5533 }
5534
5535if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5536if (U.getOperandNo() !=AtomicCmpXchgInst::getPointerOperandIndex())
5537returntrue;// Storing addr, not into addr.
5538 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5539continue;
5540 }
5541
5542if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5543if (CI->hasFnAttr(Attribute::Cold)) {
5544// If this is a cold call, we can sink the addressing calculation into
5545// the cold path. See optimizeCallInst
5546if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5547continue;
5548 }
5549
5550InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5551if (!IA)
5552returntrue;
5553
5554// If this is a memory operand, we're cool, otherwise bail out.
5555if (!IsOperandAMemoryOperand(CI, IA,I, TLI,TRI))
5556returntrue;
5557continue;
5558 }
5559
5560if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI,TRI, OptSize,
5561 PSI, BFI, SeenInsts))
5562returntrue;
5563 }
5564
5565returnfalse;
5566}
5567
5568staticboolFindAllMemoryUses(
5569Instruction *I,SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5570constTargetLowering &TLI,constTargetRegisterInfo &TRI,bool OptSize,
5571ProfileSummaryInfo *PSI,BlockFrequencyInfo *BFI) {
5572unsigned SeenInsts = 0;
5573SmallPtrSet<Instruction *, 16> ConsideredInsts;
5574returnFindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI,TRI, OptSize,
5575 PSI, BFI, SeenInsts);
5576}
5577
5578
5579/// Return true if Val is already known to be live at the use site that we're
5580/// folding it into. If so, there is no cost to include it in the addressing
5581/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5582/// instruction already.
5583bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5584Value *KnownLive1,
5585Value *KnownLive2) {
5586// If Val is either of the known-live values, we know it is live!
5587if (Val ==nullptr || Val == KnownLive1 || Val == KnownLive2)
5588returntrue;
5589
5590// All values other than instructions and arguments (e.g. constants) are live.
5591if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5592returntrue;
5593
5594// If Val is a constant sized alloca in the entry block, it is live, this is
5595// true because it is just a reference to the stack/frame pointer, which is
5596// live for the whole function.
5597if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5598if (AI->isStaticAlloca())
5599returntrue;
5600
5601// Check to see if this value is already used in the memory instruction's
5602// block. If so, it's already live into the block at the very least, so we
5603// can reasonably fold it.
5604return Val->isUsedInBasicBlock(MemoryInst->getParent());
5605}
5606
5607/// It is possible for the addressing mode of the machine to fold the specified
5608/// instruction into a load or store that ultimately uses it.
5609/// However, the specified instruction has multiple uses.
5610/// Given this, it may actually increase register pressure to fold it
5611/// into the load. For example, consider this code:
5612///
5613/// X = ...
5614/// Y = X+1
5615/// use(Y) -> nonload/store
5616/// Z = Y+1
5617/// load Z
5618///
5619/// In this case, Y has multiple uses, and can be folded into the load of Z
5620/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5621/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5622/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5623/// number of computations either.
5624///
5625/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5626/// X was live across 'load Z' for other reasons, we actually *would* want to
5627/// fold the addressing mode in the Z case. This would make Y die earlier.
5628bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5629Instruction *I,ExtAddrMode &AMBefore,ExtAddrMode &AMAfter) {
5630if (IgnoreProfitability)
5631returntrue;
5632
5633// AMBefore is the addressing mode before this instruction was folded into it,
5634// and AMAfter is the addressing mode after the instruction was folded. Get
5635// the set of registers referenced by AMAfter and subtract out those
5636// referenced by AMBefore: this is the set of values which folding in this
5637// address extends the lifetime of.
5638//
5639// Note that there are only two potential values being referenced here,
5640// BaseReg and ScaleReg (global addresses are always available, as are any
5641// folded immediates).
5642Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5643
5644// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5645// lifetime wasn't extended by adding this instruction.
5646if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5647 BaseReg =nullptr;
5648if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5649 ScaledReg =nullptr;
5650
5651// If folding this instruction (and it's subexprs) didn't extend any live
5652// ranges, we're ok with it.
5653if (!BaseReg && !ScaledReg)
5654returntrue;
5655
5656// If all uses of this instruction can have the address mode sunk into them,
5657// we can remove the addressing mode and effectively trade one live register
5658// for another (at worst.) In this context, folding an addressing mode into
5659// the use is just a particularly nice way of sinking it.
5660SmallVector<std::pair<Use *, Type *>, 16> MemoryUses;
5661if (FindAllMemoryUses(I, MemoryUses, TLI,TRI, OptSize, PSI, BFI))
5662returnfalse;// Has a non-memory, non-foldable use!
5663
5664// Now that we know that all uses of this instruction are part of a chain of
5665// computation involving only operations that could theoretically be folded
5666// into a memory use, loop over each of these memory operation uses and see
5667// if they could *actually* fold the instruction. The assumption is that
5668// addressing modes are cheap and that duplicating the computation involved
5669// many times is worthwhile, even on a fastpath. For sinking candidates
5670// (i.e. cold call sites), this serves as a way to prevent excessive code
5671// growth since most architectures have some reasonable small and fast way to
5672// compute an effective address. (i.e LEA on x86)
5673SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5674for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5675Value *Address = Pair.first->get();
5676Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5677Type *AddressAccessTy = Pair.second;
5678unsigned AS =Address->getType()->getPointerAddressSpace();
5679
5680// Do a match against the root of this address, ignoring profitability. This
5681// will tell us if the addressing mode for the memory operation will
5682// *actually* cover the shared instruction.
5683ExtAddrModeResult;
5684 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5685 0);
5686 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5687 TPT.getRestorationPoint();
5688 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI,TRI, LI, getDTFn,
5689 AddressAccessTy, AS, UserI, Result,
5690 InsertedInsts, PromotedInsts, TPT,
5691 LargeOffsetGEP, OptSize, PSI, BFI);
5692 Matcher.IgnoreProfitability =true;
5693boolSuccess = Matcher.matchAddr(Address, 0);
5694 (void)Success;
5695assert(Success &&"Couldn't select *anything*?");
5696
5697// The match was to check the profitability, the changes made are not
5698// part of the original matcher. Therefore, they should be dropped
5699// otherwise the original matcher will not present the right state.
5700 TPT.rollback(LastKnownGood);
5701
5702// If the match didn't cover I, then it won't be shared by it.
5703if (!is_contained(MatchedAddrModeInsts,I))
5704returnfalse;
5705
5706 MatchedAddrModeInsts.clear();
5707 }
5708
5709returntrue;
5710}
5711
5712/// Return true if the specified values are defined in a
5713/// different basic block than BB.
5714staticboolIsNonLocalValue(Value *V,BasicBlock *BB) {
5715if (Instruction *I = dyn_cast<Instruction>(V))
5716returnI->getParent() != BB;
5717returnfalse;
5718}
5719
5720/// Sink addressing mode computation immediate before MemoryInst if doing so
5721/// can be done without increasing register pressure. The need for the
5722/// register pressure constraint means this can end up being an all or nothing
5723/// decision for all uses of the same addressing computation.
5724///
5725/// Load and Store Instructions often have addressing modes that can do
5726/// significant amounts of computation. As such, instruction selection will try
5727/// to get the load or store to do as much computation as possible for the
5728/// program. The problem is that isel can only see within a single block. As
5729/// such, we sink as much legal addressing mode work into the block as possible.
5730///
5731/// This method is used to optimize both load/store and inline asms with memory
5732/// operands. It's also used to sink addressing computations feeding into cold
5733/// call sites into their (cold) basic block.
5734///
5735/// The motivation for handling sinking into cold blocks is that doing so can
5736/// both enable other address mode sinking (by satisfying the register pressure
5737/// constraint above), and reduce register pressure globally (by removing the
5738/// addressing mode computation from the fast path entirely.).
5739bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst,Value *Addr,
5740Type *AccessTy,unsigned AddrSpace) {
5741Value *Repl =Addr;
5742
5743// Try to collapse single-value PHI nodes. This is necessary to undo
5744// unprofitable PRE transformations.
5745SmallVector<Value *, 8> worklist;
5746SmallPtrSet<Value *, 16> Visited;
5747 worklist.push_back(Addr);
5748
5749// Use a worklist to iteratively look through PHI and select nodes, and
5750// ensure that the addressing mode obtained from the non-PHI/select roots of
5751// the graph are compatible.
5752bool PhiOrSelectSeen =false;
5753SmallVector<Instruction *, 16> AddrModeInsts;
5754constSimplifyQuery SQ(*DL, TLInfo);
5755 AddressingModeCombiner AddrModes(SQ,Addr);
5756 TypePromotionTransaction TPT(RemovedInsts);
5757 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5758 TPT.getRestorationPoint();
5759while (!worklist.empty()) {
5760Value *V = worklist.pop_back_val();
5761
5762// We allow traversing cyclic Phi nodes.
5763// In case of success after this loop we ensure that traversing through
5764// Phi nodes ends up with all cases to compute address of the form
5765// BaseGV + Base + Scale * Index + Offset
5766// where Scale and Offset are constans and BaseGV, Base and Index
5767// are exactly the same Values in all cases.
5768// It means that BaseGV, Scale and Offset dominate our memory instruction
5769// and have the same value as they had in address computation represented
5770// as Phi. So we can safely sink address computation to memory instruction.
5771if (!Visited.insert(V).second)
5772continue;
5773
5774// For a PHI node, push all of its incoming values.
5775if (PHINode *P = dyn_cast<PHINode>(V)) {
5776append_range(worklist,P->incoming_values());
5777 PhiOrSelectSeen =true;
5778continue;
5779 }
5780// Similar for select.
5781if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5782 worklist.push_back(SI->getFalseValue());
5783 worklist.push_back(SI->getTrueValue());
5784 PhiOrSelectSeen =true;
5785continue;
5786 }
5787
5788// For non-PHIs, determine the addressing mode being computed. Note that
5789// the result may differ depending on what other uses our candidate
5790// addressing instructions might have.
5791 AddrModeInsts.clear();
5792 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5793 0);
5794// Defer the query (and possible computation of) the dom tree to point of
5795// actual use. It's expected that most address matches don't actually need
5796// the domtree.
5797auto getDTFn = [MemoryInst,this]() ->constDominatorTree & {
5798Function *F = MemoryInst->getParent()->getParent();
5799return this->getDT(*F);
5800 };
5801ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5802 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5803 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5804BFI.get());
5805
5806GetElementPtrInst *GEP = LargeOffsetGEP.first;
5807if (GEP && !NewGEPBases.count(GEP)) {
5808// If splitting the underlying data structure can reduce the offset of a
5809// GEP, collect the GEP. Skip the GEPs that are the new bases of
5810// previously split data structures.
5811 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5812 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5813 }
5814
5815 NewAddrMode.OriginalValue =V;
5816if (!AddrModes.addNewAddrMode(NewAddrMode))
5817break;
5818 }
5819
5820// Try to combine the AddrModes we've collected. If we couldn't collect any,
5821// or we have multiple but either couldn't combine them or combining them
5822// wouldn't do anything useful, bail out now.
5823if (!AddrModes.combineAddrModes()) {
5824 TPT.rollback(LastKnownGood);
5825returnfalse;
5826 }
5827boolModified = TPT.commit();
5828
5829// Get the combined AddrMode (or the only AddrMode, if we only had one).
5830ExtAddrModeAddrMode = AddrModes.getAddrMode();
5831
5832// If all the instructions matched are already in this BB, don't do anything.
5833// If we saw a Phi node then it is not local definitely, and if we saw a
5834// select then we want to push the address calculation past it even if it's
5835// already in this BB.
5836if (!PhiOrSelectSeen &&none_of(AddrModeInsts, [&](Value *V) {
5837returnIsNonLocalValue(V, MemoryInst->getParent());
5838 })) {
5839LLVM_DEBUG(dbgs() <<"CGP: Found local addrmode: " <<AddrMode
5840 <<"\n");
5841returnModified;
5842 }
5843
5844// Insert this computation right after this user. Since our caller is
5845// scanning from the top of the BB to the bottom, reuse of the expr are
5846// guaranteed to happen later.
5847IRBuilder<> Builder(MemoryInst);
5848
5849// Now that we determined the addressing expression we want to use and know
5850// that we have to sink it into this block. Check to see if we have already
5851// done this for some other load/store instr in this block. If so, reuse
5852// the computation. Before attempting reuse, check if the address is valid
5853// as it may have been erased.
5854
5855WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5856
5857Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH :nullptr;
5858Type *IntPtrTy =DL->getIntPtrType(Addr->getType());
5859if (SunkAddr) {
5860LLVM_DEBUG(dbgs() <<"CGP: Reusing nonlocal addrmode: " <<AddrMode
5861 <<" for " << *MemoryInst <<"\n");
5862if (SunkAddr->getType() !=Addr->getType()) {
5863if (SunkAddr->getType()->getPointerAddressSpace() !=
5864Addr->getType()->getPointerAddressSpace() &&
5865 !DL->isNonIntegralPointerType(Addr->getType())) {
5866// There are two reasons the address spaces might not match: a no-op
5867// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5868// ptrtoint/inttoptr pair to ensure we match the original semantics.
5869// TODO: allow bitcast between different address space pointers with the
5870// same size.
5871 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy,"sunkaddr");
5872 SunkAddr =
5873 Builder.CreateIntToPtr(SunkAddr,Addr->getType(),"sunkaddr");
5874 }else
5875 SunkAddr = Builder.CreatePointerCast(SunkAddr,Addr->getType());
5876 }
5877 }elseif (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
5878 SubtargetInfo->addrSinkUsingGEPs())) {
5879// By default, we use the GEP-based method when AA is used later. This
5880// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5881LLVM_DEBUG(dbgs() <<"CGP: SINKING nonlocal addrmode: " <<AddrMode
5882 <<" for " << *MemoryInst <<"\n");
5883Value *ResultPtr =nullptr, *ResultIndex =nullptr;
5884
5885// First, find the pointer.
5886if (AddrMode.BaseReg &&AddrMode.BaseReg->getType()->isPointerTy()) {
5887 ResultPtr =AddrMode.BaseReg;
5888AddrMode.BaseReg =nullptr;
5889 }
5890
5891if (AddrMode.Scale &&AddrMode.ScaledReg->getType()->isPointerTy()) {
5892// We can't add more than one pointer together, nor can we scale a
5893// pointer (both of which seem meaningless).
5894if (ResultPtr ||AddrMode.Scale != 1)
5895returnModified;
5896
5897 ResultPtr =AddrMode.ScaledReg;
5898AddrMode.Scale = 0;
5899 }
5900
5901// It is only safe to sign extend the BaseReg if we know that the math
5902// required to create it did not overflow before we extend it. Since
5903// the original IR value was tossed in favor of a constant back when
5904// the AddrMode was created we need to bail out gracefully if widths
5905// do not match instead of extending it.
5906//
5907// (See below for code to add the scale.)
5908if (AddrMode.Scale) {
5909Type *ScaledRegTy =AddrMode.ScaledReg->getType();
5910if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5911 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5912returnModified;
5913 }
5914
5915GlobalValue *BaseGV =AddrMode.BaseGV;
5916if (BaseGV !=nullptr) {
5917if (ResultPtr)
5918returnModified;
5919
5920if (BaseGV->isThreadLocal()) {
5921 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
5922 }else {
5923 ResultPtr = BaseGV;
5924 }
5925 }
5926
5927// If the real base value actually came from an inttoptr, then the matcher
5928// will look through it and provide only the integer value. In that case,
5929// use it here.
5930if (!DL->isNonIntegralPointerType(Addr->getType())) {
5931if (!ResultPtr &&AddrMode.BaseReg) {
5932 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg,Addr->getType(),
5933"sunkaddr");
5934AddrMode.BaseReg =nullptr;
5935 }elseif (!ResultPtr &&AddrMode.Scale == 1) {
5936 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg,Addr->getType(),
5937"sunkaddr");
5938AddrMode.Scale = 0;
5939 }
5940 }
5941
5942if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
5943 !AddrMode.BaseOffs) {
5944 SunkAddr =Constant::getNullValue(Addr->getType());
5945 }elseif (!ResultPtr) {
5946returnModified;
5947 }else {
5948Type *I8PtrTy =
5949 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
5950
5951// Start with the base register. Do this first so that subsequent address
5952// matching finds it last, which will prevent it from trying to match it
5953// as the scaled value in case it happens to be a mul. That would be
5954// problematic if we've sunk a different mul for the scale, because then
5955// we'd end up sinking both muls.
5956if (AddrMode.BaseReg) {
5957Value *V =AddrMode.BaseReg;
5958if (V->getType() != IntPtrTy)
5959V = Builder.CreateIntCast(V, IntPtrTy,/*isSigned=*/true,"sunkaddr");
5960
5961 ResultIndex =V;
5962 }
5963
5964// Add the scale value.
5965if (AddrMode.Scale) {
5966Value *V =AddrMode.ScaledReg;
5967if (V->getType() == IntPtrTy) {
5968// done.
5969 }else {
5970assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
5971 cast<IntegerType>(V->getType())->getBitWidth() &&
5972"We can't transform if ScaledReg is too narrow");
5973V = Builder.CreateTrunc(V, IntPtrTy,"sunkaddr");
5974 }
5975
5976if (AddrMode.Scale != 1)
5977V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy,AddrMode.Scale),
5978"sunkaddr");
5979if (ResultIndex)
5980 ResultIndex = Builder.CreateAdd(ResultIndex, V,"sunkaddr");
5981else
5982 ResultIndex =V;
5983 }
5984
5985// Add in the Base Offset if present.
5986if (AddrMode.BaseOffs) {
5987Value *V = ConstantInt::get(IntPtrTy,AddrMode.BaseOffs);
5988if (ResultIndex) {
5989// We need to add this separately from the scale above to help with
5990// SDAG consecutive load/store merging.
5991if (ResultPtr->getType() != I8PtrTy)
5992 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5993 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex,"sunkaddr",
5994AddrMode.InBounds);
5995 }
5996
5997 ResultIndex =V;
5998 }
5999
6000if (!ResultIndex) {
6001 SunkAddr = ResultPtr;
6002 }else {
6003if (ResultPtr->getType() != I8PtrTy)
6004 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6005 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex,"sunkaddr",
6006AddrMode.InBounds);
6007 }
6008
6009if (SunkAddr->getType() !=Addr->getType()) {
6010if (SunkAddr->getType()->getPointerAddressSpace() !=
6011Addr->getType()->getPointerAddressSpace() &&
6012 !DL->isNonIntegralPointerType(Addr->getType())) {
6013// There are two reasons the address spaces might not match: a no-op
6014// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6015// ptrtoint/inttoptr pair to ensure we match the original semantics.
6016// TODO: allow bitcast between different address space pointers with
6017// the same size.
6018 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy,"sunkaddr");
6019 SunkAddr =
6020 Builder.CreateIntToPtr(SunkAddr,Addr->getType(),"sunkaddr");
6021 }else
6022 SunkAddr = Builder.CreatePointerCast(SunkAddr,Addr->getType());
6023 }
6024 }
6025 }else {
6026// We'd require a ptrtoint/inttoptr down the line, which we can't do for
6027// non-integral pointers, so in that case bail out now.
6028Type *BaseTy =AddrMode.BaseReg ?AddrMode.BaseReg->getType() :nullptr;
6029Type *ScaleTy =AddrMode.Scale ?AddrMode.ScaledReg->getType() :nullptr;
6030PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6031PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6032if (DL->isNonIntegralPointerType(Addr->getType()) ||
6033 (BasePtrTy &&DL->isNonIntegralPointerType(BasePtrTy)) ||
6034 (ScalePtrTy &&DL->isNonIntegralPointerType(ScalePtrTy)) ||
6035 (AddrMode.BaseGV &&
6036DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6037returnModified;
6038
6039LLVM_DEBUG(dbgs() <<"CGP: SINKING nonlocal addrmode: " <<AddrMode
6040 <<" for " << *MemoryInst <<"\n");
6041Type *IntPtrTy =DL->getIntPtrType(Addr->getType());
6042Value *Result =nullptr;
6043
6044// Start with the base register. Do this first so that subsequent address
6045// matching finds it last, which will prevent it from trying to match it
6046// as the scaled value in case it happens to be a mul. That would be
6047// problematic if we've sunk a different mul for the scale, because then
6048// we'd end up sinking both muls.
6049if (AddrMode.BaseReg) {
6050Value *V =AddrMode.BaseReg;
6051if (V->getType()->isPointerTy())
6052V = Builder.CreatePtrToInt(V, IntPtrTy,"sunkaddr");
6053if (V->getType() != IntPtrTy)
6054V = Builder.CreateIntCast(V, IntPtrTy,/*isSigned=*/true,"sunkaddr");
6055Result =V;
6056 }
6057
6058// Add the scale value.
6059if (AddrMode.Scale) {
6060Value *V =AddrMode.ScaledReg;
6061if (V->getType() == IntPtrTy) {
6062// done.
6063 }elseif (V->getType()->isPointerTy()) {
6064V = Builder.CreatePtrToInt(V, IntPtrTy,"sunkaddr");
6065 }elseif (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6066 cast<IntegerType>(V->getType())->getBitWidth()) {
6067V = Builder.CreateTrunc(V, IntPtrTy,"sunkaddr");
6068 }else {
6069// It is only safe to sign extend the BaseReg if we know that the math
6070// required to create it did not overflow before we extend it. Since
6071// the original IR value was tossed in favor of a constant back when
6072// the AddrMode was created we need to bail out gracefully if widths
6073// do not match instead of extending it.
6074Instruction *I = dyn_cast_or_null<Instruction>(Result);
6075if (I && (Result !=AddrMode.BaseReg))
6076I->eraseFromParent();
6077returnModified;
6078 }
6079if (AddrMode.Scale != 1)
6080V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy,AddrMode.Scale),
6081"sunkaddr");
6082if (Result)
6083Result = Builder.CreateAdd(Result, V,"sunkaddr");
6084else
6085Result =V;
6086 }
6087
6088// Add in the BaseGV if present.
6089GlobalValue *BaseGV =AddrMode.BaseGV;
6090if (BaseGV !=nullptr) {
6091Value *BaseGVPtr;
6092if (BaseGV->isThreadLocal()) {
6093 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6094 }else {
6095 BaseGVPtr = BaseGV;
6096 }
6097Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy,"sunkaddr");
6098if (Result)
6099Result = Builder.CreateAdd(Result, V,"sunkaddr");
6100else
6101Result =V;
6102 }
6103
6104// Add in the Base Offset if present.
6105if (AddrMode.BaseOffs) {
6106Value *V = ConstantInt::get(IntPtrTy,AddrMode.BaseOffs);
6107if (Result)
6108Result = Builder.CreateAdd(Result, V,"sunkaddr");
6109else
6110Result =V;
6111 }
6112
6113if (!Result)
6114 SunkAddr =Constant::getNullValue(Addr->getType());
6115else
6116 SunkAddr = Builder.CreateIntToPtr(Result,Addr->getType(),"sunkaddr");
6117 }
6118
6119 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6120// Store the newly computed address into the cache. In the case we reused a
6121// value, this should be idempotent.
6122 SunkAddrs[Addr] =WeakTrackingVH(SunkAddr);
6123
6124// If we have no uses, recursively delete the value and all dead instructions
6125// using it.
6126if (Repl->use_empty()) {
6127 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6128 RecursivelyDeleteTriviallyDeadInstructions(
6129 Repl, TLInfo, nullptr,
6130 [&](Value *V) { removeAllAssertingVHReferences(V); });
6131 });
6132 }
6133 ++NumMemoryInsts;
6134returntrue;
6135}
6136
6137/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6138/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6139/// only handle a 2 operand GEP in the same basic block or a splat constant
6140/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6141/// index.
6142///
6143/// If the existing GEP has a vector base pointer that is splat, we can look
6144/// through the splat to find the scalar pointer. If we can't find a scalar
6145/// pointer there's nothing we can do.
6146///
6147/// If we have a GEP with more than 2 indices where the middle indices are all
6148/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6149///
6150/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6151/// followed by a GEP with an all zeroes vector index. This will enable
6152/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6153/// zero index.
6154bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6155Value *Ptr) {
6156Value *NewAddr;
6157
6158if (constauto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6159// Don't optimize GEPs that don't have indices.
6160if (!GEP->hasIndices())
6161returnfalse;
6162
6163// If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6164// FIXME: We should support this by sinking the GEP.
6165if (MemoryInst->getParent() !=GEP->getParent())
6166returnfalse;
6167
6168SmallVector<Value *, 2> Ops(GEP->operands());
6169
6170bool RewriteGEP =false;
6171
6172if (Ops[0]->getType()->isVectorTy()) {
6173 Ops[0] =getSplatValue(Ops[0]);
6174if (!Ops[0])
6175returnfalse;
6176 RewriteGEP =true;
6177 }
6178
6179unsigned FinalIndex = Ops.size() - 1;
6180
6181// Ensure all but the last index is 0.
6182// FIXME: This isn't strictly required. All that's required is that they are
6183// all scalars or splats.
6184for (unsigned i = 1; i < FinalIndex; ++i) {
6185auto *C = dyn_cast<Constant>(Ops[i]);
6186if (!C)
6187returnfalse;
6188if (isa<VectorType>(C->getType()))
6189C =C->getSplatValue();
6190auto *CI = dyn_cast_or_null<ConstantInt>(C);
6191if (!CI || !CI->isZero())
6192returnfalse;
6193// Scalarize the index if needed.
6194 Ops[i] = CI;
6195 }
6196
6197// Try to scalarize the final index.
6198if (Ops[FinalIndex]->getType()->isVectorTy()) {
6199if (Value *V =getSplatValue(Ops[FinalIndex])) {
6200auto *C = dyn_cast<ConstantInt>(V);
6201// Don't scalarize all zeros vector.
6202if (!C || !C->isZero()) {
6203 Ops[FinalIndex] =V;
6204 RewriteGEP =true;
6205 }
6206 }
6207 }
6208
6209// If we made any changes or the we have extra operands, we need to generate
6210// new instructions.
6211if (!RewriteGEP && Ops.size() == 2)
6212returnfalse;
6213
6214auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6215
6216IRBuilder<> Builder(MemoryInst);
6217
6218Type *SourceTy =GEP->getSourceElementType();
6219Type *ScalarIndexTy =DL->getIndexType(Ops[0]->getType()->getScalarType());
6220
6221// If the final index isn't a vector, emit a scalar GEP containing all ops
6222// and a vector GEP with all zeroes final index.
6223if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6224 NewAddr = Builder.CreateGEP(SourceTy, Ops[0],ArrayRef(Ops).drop_front());
6225auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6226auto *SecondTy =GetElementPtrInst::getIndexedType(
6227 SourceTy,ArrayRef(Ops).drop_front());
6228 NewAddr =
6229 Builder.CreateGEP(SecondTy, NewAddr,Constant::getNullValue(IndexTy));
6230 }else {
6231Value *Base = Ops[0];
6232Value *Index = Ops[FinalIndex];
6233
6234// Create a scalar GEP if there are more than 2 operands.
6235if (Ops.size() != 2) {
6236// Replace the last index with 0.
6237 Ops[FinalIndex] =
6238Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6239Base = Builder.CreateGEP(SourceTy,Base,ArrayRef(Ops).drop_front());
6240 SourceTy =GetElementPtrInst::getIndexedType(
6241 SourceTy,ArrayRef(Ops).drop_front());
6242 }
6243
6244// Now create the GEP with scalar pointer and vector index.
6245 NewAddr = Builder.CreateGEP(SourceTy,Base, Index);
6246 }
6247 }elseif (!isa<Constant>(Ptr)) {
6248// Not a GEP, maybe its a splat and we can create a GEP to enable
6249// SelectionDAGBuilder to use it as a uniform base.
6250Value *V =getSplatValue(Ptr);
6251if (!V)
6252returnfalse;
6253
6254auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6255
6256IRBuilder<> Builder(MemoryInst);
6257
6258// Emit a vector GEP with a scalar pointer and all 0s vector index.
6259Type *ScalarIndexTy =DL->getIndexType(V->getType()->getScalarType());
6260auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6261Type *ScalarTy;
6262if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6263 Intrinsic::masked_gather) {
6264 ScalarTy = MemoryInst->getType()->getScalarType();
6265 }else {
6266assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6267 Intrinsic::masked_scatter);
6268 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6269 }
6270 NewAddr = Builder.CreateGEP(ScalarTy, V,Constant::getNullValue(IndexTy));
6271 }else {
6272// Constant, SelectionDAGBuilder knows to check if its a splat.
6273returnfalse;
6274 }
6275
6276 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6277
6278// If we have no uses, recursively delete the value and all dead instructions
6279// using it.
6280if (Ptr->use_empty())
6281RecursivelyDeleteTriviallyDeadInstructions(
6282Ptr, TLInfo,nullptr,
6283 [&](Value *V) { removeAllAssertingVHReferences(V); });
6284
6285returntrue;
6286}
6287
6288/// If there are any memory operands, use OptimizeMemoryInst to sink their
6289/// address computing into the block when possible / profitable.
6290bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6291bool MadeChange =false;
6292
6293constTargetRegisterInfo *TRI =
6294TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6295TargetLowering::AsmOperandInfoVector TargetConstraints =
6296 TLI->ParseConstraints(*DL,TRI, *CS);
6297unsigned ArgNo = 0;
6298for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6299// Compute the constraint code and ConstraintType to use.
6300 TLI->ComputeConstraintToUse(OpInfo,SDValue());
6301
6302// TODO: Also handle C_Address?
6303if (OpInfo.ConstraintType ==TargetLowering::C_Memory &&
6304 OpInfo.isIndirect) {
6305Value *OpVal = CS->getArgOperand(ArgNo++);
6306 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6307 }elseif (OpInfo.Type ==InlineAsm::isInput)
6308 ArgNo++;
6309 }
6310
6311return MadeChange;
6312}
6313
6314/// Check if all the uses of \p Val are equivalent (or free) zero or
6315/// sign extensions.
6316staticboolhasSameExtUse(Value *Val,constTargetLowering &TLI) {
6317assert(!Val->use_empty() &&"Input must have at least one use");
6318constInstruction *FirstUser = cast<Instruction>(*Val->user_begin());
6319bool IsSExt = isa<SExtInst>(FirstUser);
6320Type *ExtTy = FirstUser->getType();
6321for (constUser *U : Val->users()) {
6322constInstruction *UI = cast<Instruction>(U);
6323if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6324returnfalse;
6325Type *CurTy = UI->getType();
6326// Same input and output types: Same instruction after CSE.
6327if (CurTy == ExtTy)
6328continue;
6329
6330// If IsSExt is true, we are in this situation:
6331// a = Val
6332// b = sext ty1 a to ty2
6333// c = sext ty1 a to ty3
6334// Assuming ty2 is shorter than ty3, this could be turned into:
6335// a = Val
6336// b = sext ty1 a to ty2
6337// c = sext ty2 b to ty3
6338// However, the last sext is not free.
6339if (IsSExt)
6340returnfalse;
6341
6342// This is a ZExt, maybe this is free to extend from one type to another.
6343// In that case, we would not account for a different use.
6344Type *NarrowTy;
6345Type *LargeTy;
6346if (ExtTy->getScalarType()->getIntegerBitWidth() >
6347 CurTy->getScalarType()->getIntegerBitWidth()) {
6348 NarrowTy = CurTy;
6349 LargeTy = ExtTy;
6350 }else {
6351 NarrowTy = ExtTy;
6352 LargeTy = CurTy;
6353 }
6354
6355if (!TLI.isZExtFree(NarrowTy, LargeTy))
6356returnfalse;
6357 }
6358// All uses are the same or can be derived from one another for free.
6359returntrue;
6360}
6361
6362/// Try to speculatively promote extensions in \p Exts and continue
6363/// promoting through newly promoted operands recursively as far as doing so is
6364/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6365/// When some promotion happened, \p TPT contains the proper state to revert
6366/// them.
6367///
6368/// \return true if some promotion happened, false otherwise.
6369bool CodeGenPrepare::tryToPromoteExts(
6370 TypePromotionTransaction &TPT,constSmallVectorImpl<Instruction *> &Exts,
6371SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6372unsigned CreatedInstsCost) {
6373bool Promoted =false;
6374
6375// Iterate over all the extensions to try to promote them.
6376for (auto *I : Exts) {
6377// Early check if we directly have ext(load).
6378if (isa<LoadInst>(I->getOperand(0))) {
6379 ProfitablyMovedExts.push_back(I);
6380continue;
6381 }
6382
6383// Check whether or not we want to do any promotion. The reason we have
6384// this check inside the for loop is to catch the case where an extension
6385// is directly fed by a load because in such case the extension can be moved
6386// up without any promotion on its operands.
6387if (!TLI->enableExtLdPromotion() ||DisableExtLdPromotion)
6388returnfalse;
6389
6390// Get the action to perform the promotion.
6391 TypePromotionHelper::Action TPH =
6392 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6393// Check if we can promote.
6394if (!TPH) {
6395// Save the current extension as we cannot move up through its operand.
6396 ProfitablyMovedExts.push_back(I);
6397continue;
6398 }
6399
6400// Save the current state.
6401 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6402 TPT.getRestorationPoint();
6403SmallVector<Instruction *, 4> NewExts;
6404unsigned NewCreatedInstsCost = 0;
6405unsigned ExtCost = !TLI->isExtFree(I);
6406// Promote.
6407Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6408 &NewExts,nullptr, *TLI);
6409assert(PromotedVal &&
6410"TypePromotionHelper should have filtered out those cases");
6411
6412// We would be able to merge only one extension in a load.
6413// Therefore, if we have more than 1 new extension we heuristically
6414// cut this search path, because it means we degrade the code quality.
6415// With exactly 2, the transformation is neutral, because we will merge
6416// one extension but leave one. However, we optimistically keep going,
6417// because the new extension may be removed too. Also avoid replacing a
6418// single free extension with multiple extensions, as this increases the
6419// number of IR instructions while not providing any savings.
6420longlong TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6421// FIXME: It would be possible to propagate a negative value instead of
6422// conservatively ceiling it to 0.
6423 TotalCreatedInstsCost =
6424 std::max((longlong)0, (TotalCreatedInstsCost - ExtCost));
6425if (!StressExtLdPromotion &&
6426 (TotalCreatedInstsCost > 1 ||
6427 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6428 (ExtCost == 0 && NewExts.size() > 1))) {
6429// This promotion is not profitable, rollback to the previous state, and
6430// save the current extension in ProfitablyMovedExts as the latest
6431// speculative promotion turned out to be unprofitable.
6432 TPT.rollback(LastKnownGood);
6433 ProfitablyMovedExts.push_back(I);
6434continue;
6435 }
6436// Continue promoting NewExts as far as doing so is profitable.
6437SmallVector<Instruction *, 2> NewlyMovedExts;
6438 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6439bool NewPromoted =false;
6440for (auto *ExtInst : NewlyMovedExts) {
6441Instruction *MovedExt = cast<Instruction>(ExtInst);
6442Value *ExtOperand = MovedExt->getOperand(0);
6443// If we have reached to a load, we need this extra profitability check
6444// as it could potentially be merged into an ext(load).
6445if (isa<LoadInst>(ExtOperand) &&
6446 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6447 (ExtOperand->hasOneUse() ||hasSameExtUse(ExtOperand, *TLI))))
6448continue;
6449
6450 ProfitablyMovedExts.push_back(MovedExt);
6451 NewPromoted =true;
6452 }
6453
6454// If none of speculative promotions for NewExts is profitable, rollback
6455// and save the current extension (I) as the last profitable extension.
6456if (!NewPromoted) {
6457 TPT.rollback(LastKnownGood);
6458 ProfitablyMovedExts.push_back(I);
6459continue;
6460 }
6461// The promotion is profitable.
6462 Promoted =true;
6463 }
6464return Promoted;
6465}
6466
6467/// Merging redundant sexts when one is dominating the other.
6468bool CodeGenPrepare::mergeSExts(Function &F) {
6469bool Changed =false;
6470for (auto &Entry : ValToSExtendedUses) {
6471 SExts &Insts =Entry.second;
6472 SExts CurPts;
6473for (Instruction *Inst : Insts) {
6474if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6475 Inst->getOperand(0) !=Entry.first)
6476continue;
6477bool inserted =false;
6478for (auto &Pt : CurPts) {
6479if (getDT(F).dominates(Inst, Pt)) {
6480replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6481 RemovedInsts.insert(Pt);
6482 Pt->removeFromParent();
6483 Pt = Inst;
6484 inserted =true;
6485 Changed =true;
6486break;
6487 }
6488if (!getDT(F).dominates(Pt, Inst))
6489// Give up if we need to merge in a common dominator as the
6490// experiments show it is not profitable.
6491continue;
6492replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6493 RemovedInsts.insert(Inst);
6494 Inst->removeFromParent();
6495 inserted =true;
6496 Changed =true;
6497break;
6498 }
6499if (!inserted)
6500 CurPts.push_back(Inst);
6501 }
6502 }
6503return Changed;
6504}
6505
6506// Splitting large data structures so that the GEPs accessing them can have
6507// smaller offsets so that they can be sunk to the same blocks as their users.
6508// For example, a large struct starting from %base is split into two parts
6509// where the second part starts from %new_base.
6510//
6511// Before:
6512// BB0:
6513// %base =
6514//
6515// BB1:
6516// %gep0 = gep %base, off0
6517// %gep1 = gep %base, off1
6518// %gep2 = gep %base, off2
6519//
6520// BB2:
6521// %load1 = load %gep0
6522// %load2 = load %gep1
6523// %load3 = load %gep2
6524//
6525// After:
6526// BB0:
6527// %base =
6528// %new_base = gep %base, off0
6529//
6530// BB1:
6531// %new_gep0 = %new_base
6532// %new_gep1 = gep %new_base, off1 - off0
6533// %new_gep2 = gep %new_base, off2 - off0
6534//
6535// BB2:
6536// %load1 = load i32, i32* %new_gep0
6537// %load2 = load i32, i32* %new_gep1
6538// %load3 = load i32, i32* %new_gep2
6539//
6540// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6541// their offsets are smaller enough to fit into the addressing mode.
6542bool CodeGenPrepare::splitLargeGEPOffsets() {
6543bool Changed =false;
6544for (auto &Entry : LargeOffsetGEPMap) {
6545Value *OldBase =Entry.first;
6546SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6547 &LargeOffsetGEPs =Entry.second;
6548auto compareGEPOffset =
6549 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6550const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6551if (LHS.first ==RHS.first)
6552returnfalse;
6553if (LHS.second !=RHS.second)
6554returnLHS.second <RHS.second;
6555return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6556 };
6557// Sorting all the GEPs of the same data structures based on the offsets.
6558llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6559 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6560// Skip if all the GEPs have the same offsets.
6561if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6562continue;
6563GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6564 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6565Value *NewBaseGEP =nullptr;
6566
6567auto createNewBase = [&](int64_t BaseOffset,Value *OldBase,
6568GetElementPtrInst *GEP) {
6569LLVMContext &Ctx =GEP->getContext();
6570Type *PtrIdxTy =DL->getIndexType(GEP->getType());
6571Type *I8PtrTy =
6572 PointerType::get(Ctx,GEP->getType()->getPointerAddressSpace());
6573
6574BasicBlock::iterator NewBaseInsertPt;
6575BasicBlock *NewBaseInsertBB;
6576if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6577// If the base of the struct is an instruction, the new base will be
6578// inserted close to it.
6579 NewBaseInsertBB = BaseI->getParent();
6580if (isa<PHINode>(BaseI))
6581 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6582elseif (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6583 NewBaseInsertBB =
6584SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6585 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6586 }else
6587 NewBaseInsertPt = std::next(BaseI->getIterator());
6588 }else {
6589// If the current base is an argument or global value, the new base
6590// will be inserted to the entry block.
6591 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6592 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6593 }
6594IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6595// Create a new base.
6596Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6597 NewBaseGEP = OldBase;
6598if (NewBaseGEP->getType() != I8PtrTy)
6599 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6600 NewBaseGEP =
6601 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex,"splitgep");
6602 NewGEPBases.insert(NewBaseGEP);
6603return;
6604 };
6605
6606// Check whether all the offsets can be encoded with prefered common base.
6607if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6608 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6609 BaseOffset = PreferBase;
6610// Create a new base if the offset of the BaseGEP can be decoded with one
6611// instruction.
6612 createNewBase(BaseOffset, OldBase, BaseGEP);
6613 }
6614
6615auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6616while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6617GetElementPtrInst *GEP = LargeOffsetGEP->first;
6618 int64_tOffset = LargeOffsetGEP->second;
6619if (Offset != BaseOffset) {
6620TargetLowering::AddrModeAddrMode;
6621AddrMode.HasBaseReg =true;
6622AddrMode.BaseOffs =Offset - BaseOffset;
6623// The result type of the GEP might not be the type of the memory
6624// access.
6625if (!TLI->isLegalAddressingMode(*DL,AddrMode,
6626GEP->getResultElementType(),
6627GEP->getAddressSpace())) {
6628// We need to create a new base if the offset to the current base is
6629// too large to fit into the addressing mode. So, a very large struct
6630// may be split into several parts.
6631 BaseGEP =GEP;
6632 BaseOffset =Offset;
6633 NewBaseGEP =nullptr;
6634 }
6635 }
6636
6637// Generate a new GEP to replace the current one.
6638Type *PtrIdxTy =DL->getIndexType(GEP->getType());
6639
6640if (!NewBaseGEP) {
6641// Create a new base if we don't have one yet. Find the insertion
6642// pointer for the new base first.
6643 createNewBase(BaseOffset, OldBase,GEP);
6644 }
6645
6646IRBuilder<> Builder(GEP);
6647Value *NewGEP = NewBaseGEP;
6648if (Offset != BaseOffset) {
6649// Calculate the new offset for the new GEP.
6650Value *Index = ConstantInt::get(PtrIdxTy,Offset - BaseOffset);
6651 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6652 }
6653replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6654 LargeOffsetGEPID.erase(GEP);
6655 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6656GEP->eraseFromParent();
6657 Changed =true;
6658 }
6659 }
6660return Changed;
6661}
6662
6663bool CodeGenPrepare::optimizePhiType(
6664PHINode *I,SmallPtrSetImpl<PHINode *> &Visited,
6665SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6666// We are looking for a collection on interconnected phi nodes that together
6667// only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6668// are of the same type. Convert the whole set of nodes to the type of the
6669// bitcast.
6670Type *PhiTy =I->getType();
6671Type *ConvertTy =nullptr;
6672if (Visited.count(I) ||
6673 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6674returnfalse;
6675
6676SmallVector<Instruction *, 4> Worklist;
6677 Worklist.push_back(cast<Instruction>(I));
6678SmallPtrSet<PHINode *, 4> PhiNodes;
6679SmallPtrSet<ConstantData *, 4>Constants;
6680 PhiNodes.insert(I);
6681 Visited.insert(I);
6682SmallPtrSet<Instruction *, 4> Defs;
6683SmallPtrSet<Instruction *, 4>Uses;
6684// This works by adding extra bitcasts between load/stores and removing
6685// existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6686// we can get in the situation where we remove a bitcast in one iteration
6687// just to add it again in the next. We need to ensure that at least one
6688// bitcast we remove are anchored to something that will not change back.
6689bool AnyAnchored =false;
6690
6691while (!Worklist.empty()) {
6692Instruction *II = Worklist.pop_back_val();
6693
6694if (auto *Phi = dyn_cast<PHINode>(II)) {
6695// Handle Defs, which might also be PHI's
6696for (Value *V :Phi->incoming_values()) {
6697if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6698if (!PhiNodes.count(OpPhi)) {
6699if (!Visited.insert(OpPhi).second)
6700returnfalse;
6701 PhiNodes.insert(OpPhi);
6702 Worklist.push_back(OpPhi);
6703 }
6704 }elseif (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6705if (!OpLoad->isSimple())
6706returnfalse;
6707if (Defs.insert(OpLoad).second)
6708 Worklist.push_back(OpLoad);
6709 }elseif (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6710if (Defs.insert(OpEx).second)
6711 Worklist.push_back(OpEx);
6712 }elseif (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6713if (!ConvertTy)
6714 ConvertTy = OpBC->getOperand(0)->getType();
6715if (OpBC->getOperand(0)->getType() != ConvertTy)
6716returnfalse;
6717if (Defs.insert(OpBC).second) {
6718 Worklist.push_back(OpBC);
6719 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6720 !isa<ExtractElementInst>(OpBC->getOperand(0));
6721 }
6722 }elseif (auto *OpC = dyn_cast<ConstantData>(V))
6723Constants.insert(OpC);
6724else
6725returnfalse;
6726 }
6727 }
6728
6729// Handle uses which might also be phi's
6730for (User *V :II->users()) {
6731if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6732if (!PhiNodes.count(OpPhi)) {
6733if (Visited.count(OpPhi))
6734returnfalse;
6735 PhiNodes.insert(OpPhi);
6736 Visited.insert(OpPhi);
6737 Worklist.push_back(OpPhi);
6738 }
6739 }elseif (auto *OpStore = dyn_cast<StoreInst>(V)) {
6740if (!OpStore->isSimple() || OpStore->getOperand(0) !=II)
6741returnfalse;
6742Uses.insert(OpStore);
6743 }elseif (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6744if (!ConvertTy)
6745 ConvertTy = OpBC->getType();
6746if (OpBC->getType() != ConvertTy)
6747returnfalse;
6748Uses.insert(OpBC);
6749 AnyAnchored |=
6750any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6751 }else {
6752returnfalse;
6753 }
6754 }
6755 }
6756
6757if (!ConvertTy || !AnyAnchored ||
6758 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6759returnfalse;
6760
6761LLVM_DEBUG(dbgs() <<"Converting " << *I <<"\n and connected nodes to "
6762 << *ConvertTy <<"\n");
6763
6764// Create all the new phi nodes of the new type, and bitcast any loads to the
6765// correct type.
6766ValueToValueMap ValMap;
6767for (ConstantData *C : Constants)
6768 ValMap[C] =ConstantExpr::getBitCast(C, ConvertTy);
6769for (Instruction *D : Defs) {
6770if (isa<BitCastInst>(D)) {
6771 ValMap[D] =D->getOperand(0);
6772 DeletedInstrs.insert(D);
6773 }else {
6774BasicBlock::iterator insertPt = std::next(D->getIterator());
6775 ValMap[D] =newBitCastInst(D, ConvertTy,D->getName() +".bc", insertPt);
6776 }
6777 }
6778for (PHINode *Phi : PhiNodes)
6779 ValMap[Phi] =PHINode::Create(ConvertTy,Phi->getNumIncomingValues(),
6780Phi->getName() +".tc",Phi->getIterator());
6781// Pipe together all the PhiNodes.
6782for (PHINode *Phi : PhiNodes) {
6783PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6784for (int i = 0, e =Phi->getNumIncomingValues(); i < e; i++)
6785 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6786Phi->getIncomingBlock(i));
6787 Visited.insert(NewPhi);
6788 }
6789// And finally pipe up the stores and bitcasts
6790for (Instruction *U :Uses) {
6791if (isa<BitCastInst>(U)) {
6792 DeletedInstrs.insert(U);
6793replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6794 }else {
6795U->setOperand(0,newBitCastInst(ValMap[U->getOperand(0)], PhiTy,"bc",
6796U->getIterator()));
6797 }
6798 }
6799
6800// Save the removed phis to be deleted later.
6801for (PHINode *Phi : PhiNodes)
6802 DeletedInstrs.insert(Phi);
6803returntrue;
6804}
6805
6806bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6807if (!OptimizePhiTypes)
6808returnfalse;
6809
6810bool Changed =false;
6811SmallPtrSet<PHINode *, 4> Visited;
6812SmallPtrSet<Instruction *, 4> DeletedInstrs;
6813
6814// Attempt to optimize all the phis in the functions to the correct type.
6815for (auto &BB :F)
6816for (auto &Phi : BB.phis())
6817 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6818
6819// Remove any old phi's that have been converted.
6820for (auto *I : DeletedInstrs) {
6821replaceAllUsesWith(I,PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6822I->eraseFromParent();
6823 }
6824
6825return Changed;
6826}
6827
6828/// Return true, if an ext(load) can be formed from an extension in
6829/// \p MovedExts.
6830bool CodeGenPrepare::canFormExtLd(
6831constSmallVectorImpl<Instruction *> &MovedExts,LoadInst *&LI,
6832Instruction *&Inst,bool HasPromoted) {
6833for (auto *MovedExtInst : MovedExts) {
6834if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6835 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6836 Inst = MovedExtInst;
6837break;
6838 }
6839 }
6840if (!LI)
6841returnfalse;
6842
6843// If they're already in the same block, there's nothing to do.
6844// Make the cheap checks first if we did not promote.
6845// If we promoted, we need to check if it is indeed profitable.
6846if (!HasPromoted && LI->getParent() == Inst->getParent())
6847returnfalse;
6848
6849return TLI->isExtLoad(LI, Inst, *DL);
6850}
6851
6852/// Move a zext or sext fed by a load into the same basic block as the load,
6853/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6854/// extend into the load.
6855///
6856/// E.g.,
6857/// \code
6858/// %ld = load i32* %addr
6859/// %add = add nuw i32 %ld, 4
6860/// %zext = zext i32 %add to i64
6861// \endcode
6862/// =>
6863/// \code
6864/// %ld = load i32* %addr
6865/// %zext = zext i32 %ld to i64
6866/// %add = add nuw i64 %zext, 4
6867/// \encode
6868/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6869/// allow us to match zext(load i32*) to i64.
6870///
6871/// Also, try to promote the computations used to obtain a sign extended
6872/// value used into memory accesses.
6873/// E.g.,
6874/// \code
6875/// a = add nsw i32 b, 3
6876/// d = sext i32 a to i64
6877/// e = getelementptr ..., i64 d
6878/// \endcode
6879/// =>
6880/// \code
6881/// f = sext i32 b to i64
6882/// a = add nsw i64 f, 3
6883/// e = getelementptr ..., i64 a
6884/// \endcode
6885///
6886/// \p Inst[in/out] the extension may be modified during the process if some
6887/// promotions apply.
6888bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6889bool AllowPromotionWithoutCommonHeader =false;
6890 /// See if it is an interesting sext operations for the address type
6891 /// promotion before trying to promote it, e.g., the ones with the right
6892 /// type and used in memory accesses.
6893bool ATPConsiderable =TTI->shouldConsiderAddressTypePromotion(
6894 *Inst, AllowPromotionWithoutCommonHeader);
6895 TypePromotionTransaction TPT(RemovedInsts);
6896 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6897 TPT.getRestorationPoint();
6898SmallVector<Instruction *, 1> Exts;
6899SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6900 Exts.push_back(Inst);
6901
6902bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6903
6904// Look for a load being extended.
6905LoadInst *LI =nullptr;
6906Instruction *ExtFedByLoad;
6907
6908// Try to promote a chain of computation if it allows to form an extended
6909// load.
6910if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6911assert(LI && ExtFedByLoad &&"Expect a valid load and extension");
6912 TPT.commit();
6913// Move the extend into the same block as the load.
6914 ExtFedByLoad->moveAfter(LI);
6915 ++NumExtsMoved;
6916 Inst = ExtFedByLoad;
6917returntrue;
6918 }
6919
6920// Continue promoting SExts if known as considerable depending on targets.
6921if (ATPConsiderable &&
6922 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6923 HasPromoted, TPT, SpeculativelyMovedExts))
6924returntrue;
6925
6926 TPT.rollback(LastKnownGood);
6927returnfalse;
6928}
6929
6930// Perform address type promotion if doing so is profitable.
6931// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6932// instructions that sign extended the same initial value. However, if
6933// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6934// extension is just profitable.
6935bool CodeGenPrepare::performAddressTypePromotion(
6936Instruction *&Inst,bool AllowPromotionWithoutCommonHeader,
6937bool HasPromoted, TypePromotionTransaction &TPT,
6938SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6939bool Promoted =false;
6940SmallPtrSet<Instruction *, 1> UnhandledExts;
6941bool AllSeenFirst =true;
6942for (auto *I : SpeculativelyMovedExts) {
6943Value *HeadOfChain =I->getOperand(0);
6944DenseMap<Value *, Instruction *>::iterator AlreadySeen =
6945 SeenChainsForSExt.find(HeadOfChain);
6946// If there is an unhandled SExt which has the same header, try to promote
6947// it as well.
6948if (AlreadySeen != SeenChainsForSExt.end()) {
6949if (AlreadySeen->second !=nullptr)
6950 UnhandledExts.insert(AlreadySeen->second);
6951 AllSeenFirst =false;
6952 }
6953 }
6954
6955if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6956 SpeculativelyMovedExts.size() == 1)) {
6957 TPT.commit();
6958if (HasPromoted)
6959 Promoted =true;
6960for (auto *I : SpeculativelyMovedExts) {
6961Value *HeadOfChain =I->getOperand(0);
6962 SeenChainsForSExt[HeadOfChain] =nullptr;
6963 ValToSExtendedUses[HeadOfChain].push_back(I);
6964 }
6965// Update Inst as promotion happen.
6966 Inst = SpeculativelyMovedExts.pop_back_val();
6967 }else {
6968// This is the first chain visited from the header, keep the current chain
6969// as unhandled. Defer to promote this until we encounter another SExt
6970// chain derived from the same header.
6971for (auto *I : SpeculativelyMovedExts) {
6972Value *HeadOfChain =I->getOperand(0);
6973 SeenChainsForSExt[HeadOfChain] = Inst;
6974 }
6975returnfalse;
6976 }
6977
6978if (!AllSeenFirst && !UnhandledExts.empty())
6979for (auto *VisitedSExt : UnhandledExts) {
6980if (RemovedInsts.count(VisitedSExt))
6981continue;
6982 TypePromotionTransaction TPT(RemovedInsts);
6983SmallVector<Instruction *, 1> Exts;
6984SmallVector<Instruction *, 2> Chains;
6985 Exts.push_back(VisitedSExt);
6986bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6987 TPT.commit();
6988if (HasPromoted)
6989 Promoted =true;
6990for (auto *I : Chains) {
6991Value *HeadOfChain =I->getOperand(0);
6992// Mark this as handled.
6993 SeenChainsForSExt[HeadOfChain] =nullptr;
6994 ValToSExtendedUses[HeadOfChain].push_back(I);
6995 }
6996 }
6997return Promoted;
6998}
6999
7000bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7001BasicBlock *DefBB =I->getParent();
7002
7003// If the result of a {s|z}ext and its source are both live out, rewrite all
7004// other uses of the source with result of extension.
7005Value *Src =I->getOperand(0);
7006if (Src->hasOneUse())
7007returnfalse;
7008
7009// Only do this xform if truncating is free.
7010if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7011returnfalse;
7012
7013// Only safe to perform the optimization if the source is also defined in
7014// this block.
7015if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7016returnfalse;
7017
7018bool DefIsLiveOut =false;
7019for (User *U :I->users()) {
7020Instruction *UI = cast<Instruction>(U);
7021
7022// Figure out which BB this ext is used in.
7023BasicBlock *UserBB = UI->getParent();
7024if (UserBB == DefBB)
7025continue;
7026 DefIsLiveOut =true;
7027break;
7028 }
7029if (!DefIsLiveOut)
7030returnfalse;
7031
7032// Make sure none of the uses are PHI nodes.
7033for (User *U : Src->users()) {
7034Instruction *UI = cast<Instruction>(U);
7035BasicBlock *UserBB = UI->getParent();
7036if (UserBB == DefBB)
7037continue;
7038// Be conservative. We don't want this xform to end up introducing
7039// reloads just before load / store instructions.
7040if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7041returnfalse;
7042 }
7043
7044// InsertedTruncs - Only insert one trunc in each block once.
7045DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7046
7047bool MadeChange =false;
7048for (Use &U : Src->uses()) {
7049Instruction *User = cast<Instruction>(U.getUser());
7050
7051// Figure out which BB this ext is used in.
7052BasicBlock *UserBB =User->getParent();
7053if (UserBB == DefBB)
7054continue;
7055
7056// Both src and def are live in this block. Rewrite the use.
7057Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7058
7059if (!InsertedTrunc) {
7060BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7061assert(InsertPt != UserBB->end());
7062 InsertedTrunc =newTruncInst(I, Src->getType(),"");
7063 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7064 InsertedInsts.insert(InsertedTrunc);
7065 }
7066
7067// Replace a use of the {s|z}ext source with a use of the result.
7068U = InsertedTrunc;
7069 ++NumExtUses;
7070 MadeChange =true;
7071 }
7072
7073return MadeChange;
7074}
7075
7076// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7077// just after the load if the target can fold this into one extload instruction,
7078// with the hope of eliminating some of the other later "and" instructions using
7079// the loaded value. "and"s that are made trivially redundant by the insertion
7080// of the new "and" are removed by this function, while others (e.g. those whose
7081// path from the load goes through a phi) are left for isel to potentially
7082// remove.
7083//
7084// For example:
7085//
7086// b0:
7087// x = load i32
7088// ...
7089// b1:
7090// y = and x, 0xff
7091// z = use y
7092//
7093// becomes:
7094//
7095// b0:
7096// x = load i32
7097// x' = and x, 0xff
7098// ...
7099// b1:
7100// z = use x'
7101//
7102// whereas:
7103//
7104// b0:
7105// x1 = load i32
7106// ...
7107// b1:
7108// x2 = load i32
7109// ...
7110// b2:
7111// x = phi x1, x2
7112// y = and x, 0xff
7113//
7114// becomes (after a call to optimizeLoadExt for each load):
7115//
7116// b0:
7117// x1 = load i32
7118// x1' = and x1, 0xff
7119// ...
7120// b1:
7121// x2 = load i32
7122// x2' = and x2, 0xff
7123// ...
7124// b2:
7125// x = phi x1', x2'
7126// y = and x, 0xff
7127bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7128if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7129returnfalse;
7130
7131// Skip loads we've already transformed.
7132if (Load->hasOneUse() &&
7133 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7134returnfalse;
7135
7136// Look at all uses of Load, looking through phis, to determine how many bits
7137// of the loaded value are needed.
7138SmallVector<Instruction *, 8> WorkList;
7139SmallPtrSet<Instruction *, 16> Visited;
7140SmallVector<Instruction *, 8> AndsToMaybeRemove;
7141SmallVector<Instruction *, 8> DropFlags;
7142for (auto *U :Load->users())
7143 WorkList.push_back(cast<Instruction>(U));
7144
7145EVT LoadResultVT = TLI->getValueType(*DL,Load->getType());
7146unsignedBitWidth = LoadResultVT.getSizeInBits();
7147// If the BitWidth is 0, do not try to optimize the type
7148if (BitWidth == 0)
7149returnfalse;
7150
7151APInt DemandBits(BitWidth, 0);
7152APInt WidestAndBits(BitWidth, 0);
7153
7154while (!WorkList.empty()) {
7155Instruction *I = WorkList.pop_back_val();
7156
7157// Break use-def graph loops.
7158if (!Visited.insert(I).second)
7159continue;
7160
7161// For a PHI node, push all of its users.
7162if (auto *Phi = dyn_cast<PHINode>(I)) {
7163for (auto *U :Phi->users())
7164 WorkList.push_back(cast<Instruction>(U));
7165continue;
7166 }
7167
7168switch (I->getOpcode()) {
7169case Instruction::And: {
7170auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7171if (!AndC)
7172returnfalse;
7173APInt AndBits = AndC->getValue();
7174 DemandBits |= AndBits;
7175// Keep track of the widest and mask we see.
7176if (AndBits.ugt(WidestAndBits))
7177 WidestAndBits = AndBits;
7178if (AndBits == WidestAndBits &&I->getOperand(0) == Load)
7179 AndsToMaybeRemove.push_back(I);
7180break;
7181 }
7182
7183case Instruction::Shl: {
7184auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7185if (!ShlC)
7186returnfalse;
7187uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7188 DemandBits.setLowBits(BitWidth - ShiftAmt);
7189 DropFlags.push_back(I);
7190break;
7191 }
7192
7193case Instruction::Trunc: {
7194EVT TruncVT = TLI->getValueType(*DL,I->getType());
7195unsigned TruncBitWidth = TruncVT.getSizeInBits();
7196 DemandBits.setLowBits(TruncBitWidth);
7197 DropFlags.push_back(I);
7198break;
7199 }
7200
7201default:
7202returnfalse;
7203 }
7204 }
7205
7206uint32_t ActiveBits = DemandBits.getActiveBits();
7207// Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7208// target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7209// for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7210// (and (load x) 1) is not matched as a single instruction, rather as a LDR
7211// followed by an AND.
7212// TODO: Look into removing this restriction by fixing backends to either
7213// return false for isLoadExtLegal for i1 or have them select this pattern to
7214// a single instruction.
7215//
7216// Also avoid hoisting if we didn't see any ands with the exact DemandBits
7217// mask, since these are the only ands that will be removed by isel.
7218if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7219 WidestAndBits != DemandBits)
7220returnfalse;
7221
7222LLVMContext &Ctx =Load->getType()->getContext();
7223Type *TruncTy =Type::getIntNTy(Ctx, ActiveBits);
7224EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7225
7226// Reject cases that won't be matched as extloads.
7227if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7228 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7229returnfalse;
7230
7231IRBuilder<> Builder(Load->getNextNonDebugInstruction());
7232auto *NewAnd = cast<Instruction>(
7233 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7234// Mark this instruction as "inserted by CGP", so that other
7235// optimizations don't touch it.
7236 InsertedInsts.insert(NewAnd);
7237
7238// Replace all uses of load with new and (except for the use of load in the
7239// new and itself).
7240replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7241 NewAnd->setOperand(0, Load);
7242
7243// Remove any and instructions that are now redundant.
7244for (auto *And : AndsToMaybeRemove)
7245// Check that the and mask is the same as the one we decided to put on the
7246// new and.
7247if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7248replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7249if (&*CurInstIterator ==And)
7250 CurInstIterator = std::next(And->getIterator());
7251And->eraseFromParent();
7252 ++NumAndUses;
7253 }
7254
7255// NSW flags may not longer hold.
7256for (auto *Inst : DropFlags)
7257 Inst->setHasNoSignedWrap(false);
7258
7259 ++NumAndsAdded;
7260returntrue;
7261}
7262
7263/// Check if V (an operand of a select instruction) is an expensive instruction
7264/// that is only used once.
7265staticboolsinkSelectOperand(constTargetTransformInfo *TTI,Value *V) {
7266auto *I = dyn_cast<Instruction>(V);
7267// If it's safe to speculatively execute, then it should not have side
7268// effects; therefore, it's safe to sink and possibly *not* execute.
7269returnI &&I->hasOneUse() &&isSafeToSpeculativelyExecute(I) &&
7270TTI->isExpensiveToSpeculativelyExecute(I);
7271}
7272
7273/// Returns true if a SelectInst should be turned into an explicit branch.
7274staticboolisFormingBranchFromSelectProfitable(constTargetTransformInfo *TTI,
7275constTargetLowering *TLI,
7276SelectInst *SI) {
7277// If even a predictable select is cheap, then a branch can't be cheaper.
7278if (!TLI->isPredictableSelectExpensive())
7279returnfalse;
7280
7281// FIXME: This should use the same heuristics as IfConversion to determine
7282// whether a select is better represented as a branch.
7283
7284// If metadata tells us that the select condition is obviously predictable,
7285// then we want to replace the select with a branch.
7286uint64_t TrueWeight, FalseWeight;
7287if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7288uint64_t Max = std::max(TrueWeight, FalseWeight);
7289uint64_t Sum = TrueWeight + FalseWeight;
7290if (Sum != 0) {
7291auto Probability =BranchProbability::getBranchProbability(Max, Sum);
7292if (Probability >TTI->getPredictableBranchThreshold())
7293returntrue;
7294 }
7295 }
7296
7297CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7298
7299// If a branch is predictable, an out-of-order CPU can avoid blocking on its
7300// comparison condition. If the compare has more than one use, there's
7301// probably another cmov or setcc around, so it's not worth emitting a branch.
7302if (!Cmp || !Cmp->hasOneUse())
7303returnfalse;
7304
7305// If either operand of the select is expensive and only needed on one side
7306// of the select, we should form a branch.
7307if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7308sinkSelectOperand(TTI, SI->getFalseValue()))
7309returntrue;
7310
7311returnfalse;
7312}
7313
7314/// If \p isTrue is true, return the true value of \p SI, otherwise return
7315/// false value of \p SI. If the true/false value of \p SI is defined by any
7316/// select instructions in \p Selects, look through the defining select
7317/// instruction until the true/false value is not defined in \p Selects.
7318staticValue *
7319getTrueOrFalseValue(SelectInst *SI,bool isTrue,
7320constSmallPtrSet<const Instruction *, 2> &Selects) {
7321Value *V =nullptr;
7322
7323for (SelectInst *DefSI = SI; DefSI !=nullptr && Selects.count(DefSI);
7324 DefSI = dyn_cast<SelectInst>(V)) {
7325assert(DefSI->getCondition() == SI->getCondition() &&
7326"The condition of DefSI does not match with SI");
7327 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7328 }
7329
7330assert(V &&"Failed to get select true/false value");
7331return V;
7332}
7333
7334bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7335assert(Shift->isShift() &&"Expected a shift");
7336
7337// If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7338// general vector shifts, and (3) the shift amount is a select-of-splatted
7339// values, hoist the shifts before the select:
7340// shift Op0, (select Cond, TVal, FVal) -->
7341// select Cond, (shift Op0, TVal), (shift Op0, FVal)
7342//
7343// This is inverting a generic IR transform when we know that the cost of a
7344// general vector shift is more than the cost of 2 shift-by-scalars.
7345// We can't do this effectively in SDAG because we may not be able to
7346// determine if the select operands are splats from within a basic block.
7347Type *Ty = Shift->getType();
7348if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7349returnfalse;
7350Value *Cond, *TVal, *FVal;
7351if (!match(Shift->getOperand(1),
7352m_OneUse(m_Select(m_Value(Cond),m_Value(TVal),m_Value(FVal)))))
7353returnfalse;
7354if (!isSplatValue(TVal) || !isSplatValue(FVal))
7355returnfalse;
7356
7357IRBuilder<> Builder(Shift);
7358BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7359Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7360Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7361Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7362replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7363 Shift->eraseFromParent();
7364returntrue;
7365}
7366
7367bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7368Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7369assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7370"Expected a funnel shift");
7371
7372// If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7373// than general vector shifts, and (3) the shift amount is select-of-splatted
7374// values, hoist the funnel shifts before the select:
7375// fsh Op0, Op1, (select Cond, TVal, FVal) -->
7376// select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7377//
7378// This is inverting a generic IR transform when we know that the cost of a
7379// general vector shift is more than the cost of 2 shift-by-scalars.
7380// We can't do this effectively in SDAG because we may not be able to
7381// determine if the select operands are splats from within a basic block.
7382Type *Ty = Fsh->getType();
7383if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7384returnfalse;
7385Value *Cond, *TVal, *FVal;
7386if (!match(Fsh->getOperand(2),
7387m_OneUse(m_Select(m_Value(Cond),m_Value(TVal),m_Value(FVal)))))
7388returnfalse;
7389if (!isSplatValue(TVal) || !isSplatValue(FVal))
7390returnfalse;
7391
7392IRBuilder<> Builder(Fsh);
7393Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7394Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X,Y, TVal});
7395Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X,Y, FVal});
7396Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7397replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7398 Fsh->eraseFromParent();
7399returntrue;
7400}
7401
7402/// If we have a SelectInst that will likely profit from branch prediction,
7403/// turn it into a branch.
7404bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7405if (DisableSelectToBranch)
7406returnfalse;
7407
7408// If the SelectOptimize pass is enabled, selects have already been optimized.
7409if (!getCGPassBuilderOption().DisableSelectOptimize)
7410returnfalse;
7411
7412// Find all consecutive select instructions that share the same condition.
7413SmallVector<SelectInst *, 2> ASI;
7414 ASI.push_back(SI);
7415for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
7416 It !=SI->getParent()->end(); ++It) {
7417SelectInst *I = dyn_cast<SelectInst>(&*It);
7418if (I &&SI->getCondition() ==I->getCondition()) {
7419 ASI.push_back(I);
7420 }else {
7421break;
7422 }
7423 }
7424
7425SelectInst *LastSI = ASI.back();
7426// Increment the current iterator to skip all the rest of select instructions
7427// because they will be either "not lowered" or "all lowered" to branch.
7428 CurInstIterator = std::next(LastSI->getIterator());
7429// Examine debug-info attached to the consecutive select instructions. They
7430// won't be individually optimised by optimizeInst, so we need to perform
7431// DbgVariableRecord maintenence here instead.
7432for (SelectInst *SI :ArrayRef(ASI).drop_front())
7433 fixupDbgVariableRecordsOnInst(*SI);
7434
7435bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7436
7437// Can we convert the 'select' to CF ?
7438if (VectorCond ||SI->getMetadata(LLVMContext::MD_unpredictable))
7439returnfalse;
7440
7441TargetLowering::SelectSupportKind SelectKind;
7442if (SI->getType()->isVectorTy())
7443 SelectKind = TargetLowering::ScalarCondVectorVal;
7444else
7445 SelectKind = TargetLowering::ScalarValSelect;
7446
7447if (TLI->isSelectSupported(SelectKind) &&
7448 (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) ||
7449llvm::shouldOptimizeForSize(SI->getParent(), PSI,BFI.get())))
7450returnfalse;
7451
7452// The DominatorTree needs to be rebuilt by any consumers after this
7453// transformation. We simply reset here rather than setting the ModifiedDT
7454// flag to avoid restarting the function walk in runOnFunction for each
7455// select optimized.
7456 DT.reset();
7457
7458// Transform a sequence like this:
7459// start:
7460// %cmp = cmp uge i32 %a, %b
7461// %sel = select i1 %cmp, i32 %c, i32 %d
7462//
7463// Into:
7464// start:
7465// %cmp = cmp uge i32 %a, %b
7466// %cmp.frozen = freeze %cmp
7467// br i1 %cmp.frozen, label %select.true, label %select.false
7468// select.true:
7469// br label %select.end
7470// select.false:
7471// br label %select.end
7472// select.end:
7473// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7474//
7475// %cmp should be frozen, otherwise it may introduce undefined behavior.
7476// In addition, we may sink instructions that produce %c or %d from
7477// the entry block into the destination(s) of the new branch.
7478// If the true or false blocks do not contain a sunken instruction, that
7479// block and its branch may be optimized away. In that case, one side of the
7480// first branch will point directly to select.end, and the corresponding PHI
7481// predecessor block will be the start block.
7482
7483// Collect values that go on the true side and the values that go on the false
7484// side.
7485SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7486for (SelectInst *SI : ASI) {
7487if (Value *V =SI->getTrueValue();sinkSelectOperand(TTI, V))
7488 TrueInstrs.push_back(cast<Instruction>(V));
7489if (Value *V =SI->getFalseValue();sinkSelectOperand(TTI, V))
7490 FalseInstrs.push_back(cast<Instruction>(V));
7491 }
7492
7493// Split the select block, according to how many (if any) values go on each
7494// side.
7495BasicBlock *StartBlock =SI->getParent();
7496BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7497// We should split before any debug-info.
7498 SplitPt.setHeadBit(true);
7499
7500IRBuilder<>IB(SI);
7501auto *CondFr =IB.CreateFreeze(SI->getCondition(),SI->getName() +".frozen");
7502
7503BasicBlock *TrueBlock =nullptr;
7504BasicBlock *FalseBlock =nullptr;
7505BasicBlock *EndBlock =nullptr;
7506BranchInst *TrueBranch =nullptr;
7507BranchInst *FalseBranch =nullptr;
7508if (TrueInstrs.size() == 0) {
7509 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7510 CondFr, SplitPt,false,nullptr,nullptr, LI));
7511 FalseBlock = FalseBranch->getParent();
7512 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7513 }elseif (FalseInstrs.size() == 0) {
7514 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7515 CondFr, SplitPt,false,nullptr,nullptr, LI));
7516 TrueBlock = TrueBranch->getParent();
7517 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7518 }else {
7519Instruction *ThenTerm =nullptr;
7520Instruction *ElseTerm =nullptr;
7521SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7522nullptr,nullptr, LI);
7523 TrueBranch = cast<BranchInst>(ThenTerm);
7524 FalseBranch = cast<BranchInst>(ElseTerm);
7525 TrueBlock = TrueBranch->getParent();
7526 FalseBlock = FalseBranch->getParent();
7527 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7528 }
7529
7530 EndBlock->setName("select.end");
7531if (TrueBlock)
7532 TrueBlock->setName("select.true.sink");
7533if (FalseBlock)
7534 FalseBlock->setName(FalseInstrs.size() == 0 ?"select.false"
7535 :"select.false.sink");
7536
7537if (IsHugeFunc) {
7538if (TrueBlock)
7539 FreshBBs.insert(TrueBlock);
7540if (FalseBlock)
7541 FreshBBs.insert(FalseBlock);
7542 FreshBBs.insert(EndBlock);
7543 }
7544
7545BFI->setBlockFreq(EndBlock,BFI->getBlockFreq(StartBlock));
7546
7547staticconstunsigned MD[] = {
7548 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7549 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7550 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7551
7552// Sink expensive instructions into the conditional blocks to avoid executing
7553// them speculatively.
7554for (Instruction *I : TrueInstrs)
7555I->moveBefore(TrueBranch->getIterator());
7556for (Instruction *I : FalseInstrs)
7557I->moveBefore(FalseBranch->getIterator());
7558
7559// If we did not create a new block for one of the 'true' or 'false' paths
7560// of the condition, it means that side of the branch goes to the end block
7561// directly and the path originates from the start block from the point of
7562// view of the new PHI.
7563if (TrueBlock ==nullptr)
7564 TrueBlock = StartBlock;
7565elseif (FalseBlock ==nullptr)
7566 FalseBlock = StartBlock;
7567
7568SmallPtrSet<const Instruction *, 2> INS;
7569 INS.insert(ASI.begin(), ASI.end());
7570// Use reverse iterator because later select may use the value of the
7571// earlier select, and we need to propagate value through earlier select
7572// to get the PHI operand.
7573for (SelectInst *SI :llvm::reverse(ASI)) {
7574// The select itself is replaced with a PHI Node.
7575PHINode *PN =PHINode::Create(SI->getType(), 2,"");
7576 PN->insertBefore(EndBlock->begin());
7577 PN->takeName(SI);
7578 PN->addIncoming(getTrueOrFalseValue(SI,true, INS), TrueBlock);
7579 PN->addIncoming(getTrueOrFalseValue(SI,false, INS), FalseBlock);
7580 PN->setDebugLoc(SI->getDebugLoc());
7581
7582replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7583SI->eraseFromParent();
7584 INS.erase(SI);
7585 ++NumSelectsExpanded;
7586 }
7587
7588// Instruct OptimizeBlock to skip to the next block.
7589 CurInstIterator = StartBlock->end();
7590returntrue;
7591}
7592
7593/// Some targets only accept certain types for splat inputs. For example a VDUP
7594/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7595/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7596bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7597// Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7598if (!match(SVI,m_Shuffle(m_InsertElt(m_Undef(),m_Value(),m_ZeroInt()),
7599m_Undef(),m_ZeroMask())))
7600returnfalse;
7601Type *NewType = TLI->shouldConvertSplatType(SVI);
7602if (!NewType)
7603returnfalse;
7604
7605auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7606assert(!NewType->isVectorTy() &&"Expected a scalar type!");
7607assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7608"Expected a type of the same size!");
7609auto *NewVecType =
7610FixedVectorType::get(NewType, SVIVecType->getNumElements());
7611
7612// Create a bitcast (shuffle (insert (bitcast(..))))
7613IRBuilder<> Builder(SVI->getContext());
7614 Builder.SetInsertPoint(SVI);
7615Value *BC1 = Builder.CreateBitCast(
7616 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7617Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7618Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7619
7620replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7621RecursivelyDeleteTriviallyDeadInstructions(
7622 SVI, TLInfo,nullptr,
7623 [&](Value *V) { removeAllAssertingVHReferences(V); });
7624
7625// Also hoist the bitcast up to its operand if it they are not in the same
7626// block.
7627if (auto *BCI = dyn_cast<Instruction>(BC1))
7628if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7629if (BCI->getParent() !=Op->getParent() && !isa<PHINode>(Op) &&
7630 !Op->isTerminator() && !Op->isEHPad())
7631 BCI->moveAfter(Op);
7632
7633returntrue;
7634}
7635
7636bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7637// If the operands of I can be folded into a target instruction together with
7638// I, duplicate and sink them.
7639SmallVector<Use *, 4> OpsToSink;
7640if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7641returnfalse;
7642
7643// OpsToSink can contain multiple uses in a use chain (e.g.
7644// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7645// uses must come first, so we process the ops in reverse order so as to not
7646// create invalid IR.
7647BasicBlock *TargetBB =I->getParent();
7648bool Changed =false;
7649SmallVector<Use *, 4> ToReplace;
7650Instruction *InsertPoint =I;
7651DenseMap<const Instruction *, unsigned long> InstOrdering;
7652unsignedlong InstNumber = 0;
7653for (constauto &I : *TargetBB)
7654 InstOrdering[&I] = InstNumber++;
7655
7656for (Use *U :reverse(OpsToSink)) {
7657auto *UI = cast<Instruction>(U->get());
7658if (isa<PHINode>(UI))
7659continue;
7660if (UI->getParent() == TargetBB) {
7661if (InstOrdering[UI] < InstOrdering[InsertPoint])
7662 InsertPoint = UI;
7663continue;
7664 }
7665 ToReplace.push_back(U);
7666 }
7667
7668SetVector<Instruction *> MaybeDead;
7669DenseMap<Instruction *, Instruction *> NewInstructions;
7670for (Use *U : ToReplace) {
7671auto *UI = cast<Instruction>(U->get());
7672Instruction *NI = UI->clone();
7673
7674if (IsHugeFunc) {
7675// Now we clone an instruction, its operands' defs may sink to this BB
7676// now. So we put the operands defs' BBs into FreshBBs to do optimization.
7677for (Value *Op : NI->operands())
7678if (auto *OpDef = dyn_cast<Instruction>(Op))
7679 FreshBBs.insert(OpDef->getParent());
7680 }
7681
7682 NewInstructions[UI] = NI;
7683 MaybeDead.insert(UI);
7684LLVM_DEBUG(dbgs() <<"Sinking " << *UI <<" to user " << *I <<"\n");
7685 NI->insertBefore(InsertPoint->getIterator());
7686 InsertPoint = NI;
7687 InsertedInsts.insert(NI);
7688
7689// Update the use for the new instruction, making sure that we update the
7690// sunk instruction uses, if it is part of a chain that has already been
7691// sunk.
7692Instruction *OldI = cast<Instruction>(U->getUser());
7693if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7694 It->second->setOperand(U->getOperandNo(), NI);
7695else
7696U->set(NI);
7697 Changed =true;
7698 }
7699
7700// Remove instructions that are dead after sinking.
7701for (auto *I : MaybeDead) {
7702if (!I->hasNUsesOrMore(1)) {
7703LLVM_DEBUG(dbgs() <<"Removing dead instruction: " << *I <<"\n");
7704I->eraseFromParent();
7705 }
7706 }
7707
7708return Changed;
7709}
7710
7711bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7712Value *Cond =SI->getCondition();
7713Type *OldType =Cond->getType();
7714LLVMContext &Context =Cond->getContext();
7715EVT OldVT = TLI->getValueType(*DL, OldType);
7716MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7717unsigned RegWidth = RegType.getSizeInBits();
7718
7719if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7720returnfalse;
7721
7722// If the register width is greater than the type width, expand the condition
7723// of the switch instruction and each case constant to the width of the
7724// register. By widening the type of the switch condition, subsequent
7725// comparisons (for case comparisons) will not need to be extended to the
7726// preferred register width, so we will potentially eliminate N-1 extends,
7727// where N is the number of cases in the switch.
7728auto *NewType =Type::getIntNTy(Context, RegWidth);
7729
7730// Extend the switch condition and case constants using the target preferred
7731// extend unless the switch condition is a function argument with an extend
7732// attribute. In that case, we can avoid an unnecessary mask/extension by
7733// matching the argument extension instead.
7734Instruction::CastOps ExtType = Instruction::ZExt;
7735// Some targets prefer SExt over ZExt.
7736if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7737 ExtType = Instruction::SExt;
7738
7739if (auto *Arg = dyn_cast<Argument>(Cond)) {
7740if (Arg->hasSExtAttr())
7741 ExtType = Instruction::SExt;
7742if (Arg->hasZExtAttr())
7743 ExtType = Instruction::ZExt;
7744 }
7745
7746auto *ExtInst =CastInst::Create(ExtType,Cond, NewType);
7747 ExtInst->insertBefore(SI->getIterator());
7748 ExtInst->setDebugLoc(SI->getDebugLoc());
7749SI->setCondition(ExtInst);
7750for (auto Case :SI->cases()) {
7751constAPInt &NarrowConst = Case.getCaseValue()->getValue();
7752APInt WideConst = (ExtType == Instruction::ZExt)
7753 ? NarrowConst.zext(RegWidth)
7754 : NarrowConst.sext(RegWidth);
7755 Case.setValue(ConstantInt::get(Context, WideConst));
7756 }
7757
7758returntrue;
7759}
7760
7761bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7762// The SCCP optimization tends to produce code like this:
7763// switch(x) { case 42: phi(42, ...) }
7764// Materializing the constant for the phi-argument needs instructions; So we
7765// change the code to:
7766// switch(x) { case 42: phi(x, ...) }
7767
7768Value *Condition =SI->getCondition();
7769// Avoid endless loop in degenerate case.
7770if (isa<ConstantInt>(*Condition))
7771returnfalse;
7772
7773bool Changed =false;
7774BasicBlock *SwitchBB =SI->getParent();
7775Type *ConditionType = Condition->getType();
7776
7777for (constSwitchInst::CaseHandle &Case :SI->cases()) {
7778ConstantInt *CaseValue = Case.getCaseValue();
7779BasicBlock *CaseBB = Case.getCaseSuccessor();
7780// Set to true if we previously checked that `CaseBB` is only reached by
7781// a single case from this switch.
7782bool CheckedForSinglePred =false;
7783for (PHINode &PHI : CaseBB->phis()) {
7784Type *PHIType =PHI.getType();
7785// If ZExt is free then we can also catch patterns like this:
7786// switch((i32)x) { case 42: phi((i64)42, ...); }
7787// and replace `(i64)42` with `zext i32 %x to i64`.
7788bool TryZExt =
7789 PHIType->isIntegerTy() &&
7790 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7791 TLI->isZExtFree(ConditionType, PHIType);
7792if (PHIType == ConditionType || TryZExt) {
7793// Set to true to skip this case because of multiple preds.
7794bool SkipCase =false;
7795Value *Replacement =nullptr;
7796for (unsignedI = 0, E =PHI.getNumIncomingValues();I != E;I++) {
7797Value *PHIValue =PHI.getIncomingValue(I);
7798if (PHIValue != CaseValue) {
7799if (!TryZExt)
7800continue;
7801ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7802if (!PHIValueInt ||
7803 PHIValueInt->getValue() !=
7804 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7805continue;
7806 }
7807if (PHI.getIncomingBlock(I) != SwitchBB)
7808continue;
7809// We cannot optimize if there are multiple case labels jumping to
7810// this block. This check may get expensive when there are many
7811// case labels so we test for it last.
7812if (!CheckedForSinglePred) {
7813 CheckedForSinglePred =true;
7814if (SI->findCaseDest(CaseBB) ==nullptr) {
7815 SkipCase =true;
7816break;
7817 }
7818 }
7819
7820if (Replacement ==nullptr) {
7821if (PHIValue == CaseValue) {
7822 Replacement = Condition;
7823 }else {
7824IRBuilder<> Builder(SI);
7825 Replacement = Builder.CreateZExt(Condition, PHIType);
7826 }
7827 }
7828PHI.setIncomingValue(I, Replacement);
7829 Changed =true;
7830 }
7831if (SkipCase)
7832break;
7833 }
7834 }
7835 }
7836return Changed;
7837}
7838
7839bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7840bool Changed = optimizeSwitchType(SI);
7841 Changed |= optimizeSwitchPhiConstants(SI);
7842return Changed;
7843}
7844
7845namespace{
7846
7847/// Helper class to promote a scalar operation to a vector one.
7848/// This class is used to move downward extractelement transition.
7849/// E.g.,
7850/// a = vector_op <2 x i32>
7851/// b = extractelement <2 x i32> a, i32 0
7852/// c = scalar_op b
7853/// store c
7854///
7855/// =>
7856/// a = vector_op <2 x i32>
7857/// c = vector_op a (equivalent to scalar_op on the related lane)
7858/// * d = extractelement <2 x i32> c, i32 0
7859/// * store d
7860/// Assuming both extractelement and store can be combine, we get rid of the
7861/// transition.
7862classVectorPromoteHelper {
7863 /// DataLayout associated with the current module.
7864constDataLayout &DL;
7865
7866 /// Used to perform some checks on the legality of vector operations.
7867constTargetLowering &TLI;
7868
7869 /// Used to estimated the cost of the promoted chain.
7870constTargetTransformInfo &TTI;
7871
7872 /// The transition being moved downwards.
7873Instruction *Transition;
7874
7875 /// The sequence of instructions to be promoted.
7876SmallVector<Instruction *, 4> InstsToBePromoted;
7877
7878 /// Cost of combining a store and an extract.
7879unsigned StoreExtractCombineCost;
7880
7881 /// Instruction that will be combined with the transition.
7882Instruction *CombineInst =nullptr;
7883
7884 /// The instruction that represents the current end of the transition.
7885 /// Since we are faking the promotion until we reach the end of the chain
7886 /// of computation, we need a way to get the current end of the transition.
7887Instruction *getEndOfTransition() const{
7888if (InstsToBePromoted.empty())
7889return Transition;
7890return InstsToBePromoted.back();
7891 }
7892
7893 /// Return the index of the original value in the transition.
7894 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7895 /// c, is at index 0.
7896unsigned getTransitionOriginalValueIdx() const{
7897assert(isa<ExtractElementInst>(Transition) &&
7898"Other kind of transitions are not supported yet");
7899return 0;
7900 }
7901
7902 /// Return the index of the index in the transition.
7903 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7904 /// is at index 1.
7905unsigned getTransitionIdx() const{
7906assert(isa<ExtractElementInst>(Transition) &&
7907"Other kind of transitions are not supported yet");
7908return 1;
7909 }
7910
7911 /// Get the type of the transition.
7912 /// This is the type of the original value.
7913 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7914 /// transition is <2 x i32>.
7915Type *getTransitionType() const{
7916return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7917 }
7918
7919 /// Promote \p ToBePromoted by moving \p Def downward through.
7920 /// I.e., we have the following sequence:
7921 /// Def = Transition <ty1> a to <ty2>
7922 /// b = ToBePromoted <ty2> Def, ...
7923 /// =>
7924 /// b = ToBePromoted <ty1> a, ...
7925 /// Def = Transition <ty1> ToBePromoted to <ty2>
7926void promoteImpl(Instruction *ToBePromoted);
7927
7928 /// Check whether or not it is profitable to promote all the
7929 /// instructions enqueued to be promoted.
7930bool isProfitableToPromote() {
7931Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7932unsignedIndex = isa<ConstantInt>(ValIdx)
7933 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7934 : -1;
7935Type *PromotedType = getTransitionType();
7936
7937StoreInst *ST = cast<StoreInst>(CombineInst);
7938unsigned AS =ST->getPointerAddressSpace();
7939// Check if this store is supported.
7940if (!TLI.allowsMisalignedMemoryAccesses(
7941 TLI.getValueType(DL,ST->getValueOperand()->getType()), AS,
7942ST->getAlign())) {
7943// If this is not supported, there is no way we can combine
7944// the extract with the store.
7945returnfalse;
7946 }
7947
7948// The scalar chain of computation has to pay for the transition
7949// scalar to vector.
7950// The vector chain has to account for the combining cost.
7951enumTargetTransformInfo::TargetCostKindCostKind =
7952TargetTransformInfo::TCK_RecipThroughput;
7953InstructionCost ScalarCost =
7954TTI.getVectorInstrCost(*Transition, PromotedType,CostKind, Index);
7955InstructionCost VectorCost = StoreExtractCombineCost;
7956for (constauto &Inst : InstsToBePromoted) {
7957// Compute the cost.
7958// By construction, all instructions being promoted are arithmetic ones.
7959// Moreover, one argument is a constant that can be viewed as a splat
7960// constant.
7961Value *Arg0 = Inst->getOperand(0);
7962bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7963 isa<ConstantFP>(Arg0);
7964TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
7965if (IsArg0Constant)
7966 Arg0Info.Kind =TargetTransformInfo::OK_UniformConstantValue;
7967else
7968 Arg1Info.Kind =TargetTransformInfo::OK_UniformConstantValue;
7969
7970 ScalarCost +=TTI.getArithmeticInstrCost(
7971 Inst->getOpcode(), Inst->getType(),CostKind, Arg0Info, Arg1Info);
7972 VectorCost +=TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7973CostKind, Arg0Info, Arg1Info);
7974 }
7975LLVM_DEBUG(
7976dbgs() <<"Estimated cost of computation to be promoted:\nScalar: "
7977 << ScalarCost <<"\nVector: " << VectorCost <<'\n');
7978return ScalarCost > VectorCost;
7979 }
7980
7981 /// Generate a constant vector with \p Val with the same
7982 /// number of elements as the transition.
7983 /// \p UseSplat defines whether or not \p Val should be replicated
7984 /// across the whole vector.
7985 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7986 /// otherwise we generate a vector with as many poison as possible:
7987 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
7988 /// used at the index of the extract.
7989Value *getConstantVector(Constant *Val,bool UseSplat) const{
7990unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7991if (!UseSplat) {
7992// If we cannot determine where the constant must be, we have to
7993// use a splat constant.
7994Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
7995if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
7996 ExtractIdx = CstVal->getSExtValue();
7997else
7998 UseSplat =true;
7999 }
8000
8001ElementCountEC = cast<VectorType>(getTransitionType())->getElementCount();
8002if (UseSplat)
8003returnConstantVector::getSplat(EC, Val);
8004
8005if (!EC.isScalable()) {
8006SmallVector<Constant *, 4> ConstVec;
8007PoisonValue *PoisonVal =PoisonValue::get(Val->getType());
8008for (unsignedIdx = 0;Idx !=EC.getKnownMinValue(); ++Idx) {
8009if (Idx == ExtractIdx)
8010 ConstVec.push_back(Val);
8011else
8012 ConstVec.push_back(PoisonVal);
8013 }
8014returnConstantVector::get(ConstVec);
8015 }else
8016llvm_unreachable(
8017"Generate scalable vector for non-splat is unimplemented");
8018 }
8019
8020 /// Check if promoting to a vector type an operand at \p OperandIdx
8021 /// in \p Use can trigger undefined behavior.
8022staticbool canCauseUndefinedBehavior(constInstruction *Use,
8023unsigned OperandIdx) {
8024// This is not safe to introduce undef when the operand is on
8025// the right hand side of a division-like instruction.
8026if (OperandIdx != 1)
8027returnfalse;
8028switch (Use->getOpcode()) {
8029default:
8030returnfalse;
8031case Instruction::SDiv:
8032case Instruction::UDiv:
8033case Instruction::SRem:
8034case Instruction::URem:
8035returntrue;
8036case Instruction::FDiv:
8037case Instruction::FRem:
8038return !Use->hasNoNaNs();
8039 }
8040llvm_unreachable(nullptr);
8041 }
8042
8043public:
8044 VectorPromoteHelper(constDataLayout &DL,constTargetLowering &TLI,
8045constTargetTransformInfo &TTI,Instruction *Transition,
8046unsigned CombineCost)
8047 :DL(DL), TLI(TLI),TTI(TTI), Transition(Transition),
8048 StoreExtractCombineCost(CombineCost) {
8049assert(Transition &&"Do not know how to promote null");
8050 }
8051
8052 /// Check if we can promote \p ToBePromoted to \p Type.
8053bool canPromote(constInstruction *ToBePromoted) const{
8054// We could support CastInst too.
8055return isa<BinaryOperator>(ToBePromoted);
8056 }
8057
8058 /// Check if it is profitable to promote \p ToBePromoted
8059 /// by moving downward the transition through.
8060bool shouldPromote(constInstruction *ToBePromoted) const{
8061// Promote only if all the operands can be statically expanded.
8062// Indeed, we do not want to introduce any new kind of transitions.
8063for (constUse &U : ToBePromoted->operands()) {
8064constValue *Val =U.get();
8065if (Val == getEndOfTransition()) {
8066// If the use is a division and the transition is on the rhs,
8067// we cannot promote the operation, otherwise we may create a
8068// division by zero.
8069if (canCauseUndefinedBehavior(ToBePromoted,U.getOperandNo()))
8070returnfalse;
8071continue;
8072 }
8073if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8074 !isa<ConstantFP>(Val))
8075returnfalse;
8076 }
8077// Check that the resulting operation is legal.
8078int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8079if (!ISDOpcode)
8080returnfalse;
8081returnStressStoreExtract ||
8082 TLI.isOperationLegalOrCustom(
8083 ISDOpcode, TLI.getValueType(DL, getTransitionType(),true));
8084 }
8085
8086 /// Check whether or not \p Use can be combined
8087 /// with the transition.
8088 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8089boolcanCombine(constInstruction *Use) {return isa<StoreInst>(Use); }
8090
8091 /// Record \p ToBePromoted as part of the chain to be promoted.
8092void enqueueForPromotion(Instruction *ToBePromoted) {
8093 InstsToBePromoted.push_back(ToBePromoted);
8094 }
8095
8096 /// Set the instruction that will be combined with the transition.
8097void recordCombineInstruction(Instruction *ToBeCombined) {
8098assert(canCombine(ToBeCombined) &&"Unsupported instruction to combine");
8099 CombineInst = ToBeCombined;
8100 }
8101
8102 /// Promote all the instructions enqueued for promotion if it is
8103 /// is profitable.
8104 /// \return True if the promotion happened, false otherwise.
8105bool promote() {
8106// Check if there is something to promote.
8107// Right now, if we do not have anything to combine with,
8108// we assume the promotion is not profitable.
8109if (InstsToBePromoted.empty() || !CombineInst)
8110returnfalse;
8111
8112// Check cost.
8113if (!StressStoreExtract && !isProfitableToPromote())
8114returnfalse;
8115
8116// Promote.
8117for (auto &ToBePromoted : InstsToBePromoted)
8118 promoteImpl(ToBePromoted);
8119 InstsToBePromoted.clear();
8120returntrue;
8121 }
8122};
8123
8124}// end anonymous namespace
8125
8126void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8127// At this point, we know that all the operands of ToBePromoted but Def
8128// can be statically promoted.
8129// For Def, we need to use its parameter in ToBePromoted:
8130// b = ToBePromoted ty1 a
8131// Def = Transition ty1 b to ty2
8132// Move the transition down.
8133// 1. Replace all uses of the promoted operation by the transition.
8134// = ... b => = ... Def.
8135assert(ToBePromoted->getType() == Transition->getType() &&
8136"The type of the result of the transition does not match "
8137"the final type");
8138 ToBePromoted->replaceAllUsesWith(Transition);
8139// 2. Update the type of the uses.
8140// b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8141Type *TransitionTy = getTransitionType();
8142 ToBePromoted->mutateType(TransitionTy);
8143// 3. Update all the operands of the promoted operation with promoted
8144// operands.
8145// b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8146for (Use &U : ToBePromoted->operands()) {
8147Value *Val =U.get();
8148Value *NewVal =nullptr;
8149if (Val == Transition)
8150 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8151elseif (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8152 isa<ConstantFP>(Val)) {
8153// Use a splat constant if it is not safe to use undef.
8154 NewVal =getConstantVector(
8155 cast<Constant>(Val),
8156 isa<UndefValue>(Val) ||
8157 canCauseUndefinedBehavior(ToBePromoted,U.getOperandNo()));
8158 }else
8159llvm_unreachable("Did you modified shouldPromote and forgot to update "
8160"this?");
8161 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8162 }
8163 Transition->moveAfter(ToBePromoted);
8164 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8165}
8166
8167/// Some targets can do store(extractelement) with one instruction.
8168/// Try to push the extractelement towards the stores when the target
8169/// has this feature and this is profitable.
8170bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8171unsigned CombineCost = std::numeric_limits<unsigned>::max();
8172if (DisableStoreExtract ||
8173 (!StressStoreExtract &&
8174 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
8175 Inst->getOperand(1), CombineCost)))
8176returnfalse;
8177
8178// At this point we know that Inst is a vector to scalar transition.
8179// Try to move it down the def-use chain, until:
8180// - We can combine the transition with its single use
8181// => we got rid of the transition.
8182// - We escape the current basic block
8183// => we would need to check that we are moving it at a cheaper place and
8184// we do not do that for now.
8185BasicBlock *Parent = Inst->getParent();
8186LLVM_DEBUG(dbgs() <<"Found an interesting transition: " << *Inst <<'\n');
8187 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8188// If the transition has more than one use, assume this is not going to be
8189// beneficial.
8190while (Inst->hasOneUse()) {
8191Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8192LLVM_DEBUG(dbgs() <<"Use: " << *ToBePromoted <<'\n');
8193
8194if (ToBePromoted->getParent() != Parent) {
8195LLVM_DEBUG(dbgs() <<"Instruction to promote is in a different block ("
8196 << ToBePromoted->getParent()->getName()
8197 <<") than the transition (" << Parent->getName()
8198 <<").\n");
8199returnfalse;
8200 }
8201
8202if (VPH.canCombine(ToBePromoted)) {
8203LLVM_DEBUG(dbgs() <<"Assume " << *Inst <<'\n'
8204 <<"will be combined with: " << *ToBePromoted <<'\n');
8205 VPH.recordCombineInstruction(ToBePromoted);
8206bool Changed = VPH.promote();
8207 NumStoreExtractExposed += Changed;
8208return Changed;
8209 }
8210
8211LLVM_DEBUG(dbgs() <<"Try promoting.\n");
8212if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8213returnfalse;
8214
8215LLVM_DEBUG(dbgs() <<"Promoting is possible... Enqueue for promotion!\n");
8216
8217 VPH.enqueueForPromotion(ToBePromoted);
8218 Inst = ToBePromoted;
8219 }
8220returnfalse;
8221}
8222
8223/// For the instruction sequence of store below, F and I values
8224/// are bundled together as an i64 value before being stored into memory.
8225/// Sometimes it is more efficient to generate separate stores for F and I,
8226/// which can remove the bitwise instructions or sink them to colder places.
8227///
8228/// (store (or (zext (bitcast F to i32) to i64),
8229/// (shl (zext I to i64), 32)), addr) -->
8230/// (store F, addr) and (store I, addr+4)
8231///
8232/// Similarly, splitting for other merged store can also be beneficial, like:
8233/// For pair of {i32, i32}, i64 store --> two i32 stores.
8234/// For pair of {i32, i16}, i64 store --> two i32 stores.
8235/// For pair of {i16, i16}, i32 store --> two i16 stores.
8236/// For pair of {i16, i8}, i32 store --> two i16 stores.
8237/// For pair of {i8, i8}, i16 store --> two i8 stores.
8238///
8239/// We allow each target to determine specifically which kind of splitting is
8240/// supported.
8241///
8242/// The store patterns are commonly seen from the simple code snippet below
8243/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8244/// void goo(const std::pair<int, float> &);
8245/// hoo() {
8246/// ...
8247/// goo(std::make_pair(tmp, ftmp));
8248/// ...
8249/// }
8250///
8251/// Although we already have similar splitting in DAG Combine, we duplicate
8252/// it in CodeGenPrepare to catch the case in which pattern is across
8253/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8254/// during code expansion.
8255staticboolsplitMergedValStore(StoreInst &SI,constDataLayout &DL,
8256constTargetLowering &TLI) {
8257// Handle simple but common cases only.
8258Type *StoreType = SI.getValueOperand()->getType();
8259
8260// The code below assumes shifting a value by <number of bits>,
8261// whereas scalable vectors would have to be shifted by
8262// <2log(vscale) + number of bits> in order to store the
8263// low/high parts. Bailing out for now.
8264if (StoreType->isScalableTy())
8265returnfalse;
8266
8267if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8268DL.getTypeSizeInBits(StoreType) == 0)
8269returnfalse;
8270
8271unsigned HalfValBitSize =DL.getTypeSizeInBits(StoreType) / 2;
8272Type *SplitStoreType =Type::getIntNTy(SI.getContext(), HalfValBitSize);
8273if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8274returnfalse;
8275
8276// Don't split the store if it is volatile.
8277if (SI.isVolatile())
8278returnfalse;
8279
8280// Match the following patterns:
8281// (store (or (zext LValue to i64),
8282// (shl (zext HValue to i64), 32)), HalfValBitSize)
8283// or
8284// (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8285// (zext LValue to i64),
8286// Expect both operands of OR and the first operand of SHL have only
8287// one use.
8288Value *LValue, *HValue;
8289if (!match(SI.getValueOperand(),
8290m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
8291m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
8292m_SpecificInt(HalfValBitSize))))))
8293returnfalse;
8294
8295// Check LValue and HValue are int with size less or equal than 32.
8296if (!LValue->getType()->isIntegerTy() ||
8297DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8298 !HValue->getType()->isIntegerTy() ||
8299DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8300returnfalse;
8301
8302// If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8303// as the input of target query.
8304auto *LBC = dyn_cast<BitCastInst>(LValue);
8305auto *HBC = dyn_cast<BitCastInst>(HValue);
8306EVT LowTy = LBC ?EVT::getEVT(LBC->getOperand(0)->getType())
8307 :EVT::getEVT(LValue->getType());
8308EVT HighTy = HBC ?EVT::getEVT(HBC->getOperand(0)->getType())
8309 :EVT::getEVT(HValue->getType());
8310if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8311returnfalse;
8312
8313// Start to split store.
8314IRBuilder<> Builder(SI.getContext());
8315 Builder.SetInsertPoint(&SI);
8316
8317// If LValue/HValue is a bitcast in another BB, create a new one in current
8318// BB so it may be merged with the splitted stores by dag combiner.
8319if (LBC && LBC->getParent() != SI.getParent())
8320LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8321if (HBC && HBC->getParent() != SI.getParent())
8322 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8323
8324bool IsLE = SI.getDataLayout().isLittleEndian();
8325auto CreateSplitStore = [&](Value *V,boolUpper) {
8326 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8327Value *Addr = SI.getPointerOperand();
8328Align Alignment = SI.getAlign();
8329constbool IsOffsetStore = (IsLE &&Upper) || (!IsLE && !Upper);
8330if (IsOffsetStore) {
8331Addr = Builder.CreateGEP(
8332 SplitStoreType,Addr,
8333 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8334
8335// When splitting the store in half, naturally one half will retain the
8336// alignment of the original wider store, regardless of whether it was
8337// over-aligned or not, while the other will require adjustment.
8338 Alignment =commonAlignment(Alignment, HalfValBitSize / 8);
8339 }
8340 Builder.CreateAlignedStore(V,Addr, Alignment);
8341 };
8342
8343 CreateSplitStore(LValue,false);
8344 CreateSplitStore(HValue,true);
8345
8346// Delete the old store.
8347 SI.eraseFromParent();
8348returntrue;
8349}
8350
8351// Return true if the GEP has two operands, the first operand is of a sequential
8352// type, and the second operand is a constant.
8353staticboolGEPSequentialConstIndexed(GetElementPtrInst *GEP) {
8354gep_type_iteratorI =gep_type_begin(*GEP);
8355returnGEP->getNumOperands() == 2 &&I.isSequential() &&
8356 isa<ConstantInt>(GEP->getOperand(1));
8357}
8358
8359// Try unmerging GEPs to reduce liveness interference (register pressure) across
8360// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8361// reducing liveness interference across those edges benefits global register
8362// allocation. Currently handles only certain cases.
8363//
8364// For example, unmerge %GEPI and %UGEPI as below.
8365//
8366// ---------- BEFORE ----------
8367// SrcBlock:
8368// ...
8369// %GEPIOp = ...
8370// ...
8371// %GEPI = gep %GEPIOp, Idx
8372// ...
8373// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8374// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8375// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8376// %UGEPI)
8377//
8378// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8379// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8380// ...
8381//
8382// DstBi:
8383// ...
8384// %UGEPI = gep %GEPIOp, UIdx
8385// ...
8386// ---------------------------
8387//
8388// ---------- AFTER ----------
8389// SrcBlock:
8390// ... (same as above)
8391// (* %GEPI is still alive on the indirectbr edges)
8392// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8393// unmerging)
8394// ...
8395//
8396// DstBi:
8397// ...
8398// %UGEPI = gep %GEPI, (UIdx-Idx)
8399// ...
8400// ---------------------------
8401//
8402// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8403// no longer alive on them.
8404//
8405// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8406// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8407// not to disable further simplications and optimizations as a result of GEP
8408// merging.
8409//
8410// Note this unmerging may increase the length of the data flow critical path
8411// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8412// between the register pressure and the length of data-flow critical
8413// path. Restricting this to the uncommon IndirectBr case would minimize the
8414// impact of potentially longer critical path, if any, and the impact on compile
8415// time.
8416staticbooltryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
8417constTargetTransformInfo *TTI) {
8418BasicBlock *SrcBlock = GEPI->getParent();
8419// Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8420// (non-IndirectBr) cases exit early here.
8421if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8422returnfalse;
8423// Check that GEPI is a simple gep with a single constant index.
8424if (!GEPSequentialConstIndexed(GEPI))
8425returnfalse;
8426ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8427// Check that GEPI is a cheap one.
8428if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8429TargetTransformInfo::TCK_SizeAndLatency) >
8430TargetTransformInfo::TCC_Basic)
8431returnfalse;
8432Value *GEPIOp = GEPI->getOperand(0);
8433// Check that GEPIOp is an instruction that's also defined in SrcBlock.
8434if (!isa<Instruction>(GEPIOp))
8435returnfalse;
8436auto *GEPIOpI = cast<Instruction>(GEPIOp);
8437if (GEPIOpI->getParent() != SrcBlock)
8438returnfalse;
8439// Check that GEP is used outside the block, meaning it's alive on the
8440// IndirectBr edge(s).
8441if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8442 if (auto *I = dyn_cast<Instruction>(Usr)) {
8443 if (I->getParent() != SrcBlock) {
8444 return true;
8445 }
8446 }
8447returnfalse;
8448 }))
8449returnfalse;
8450// The second elements of the GEP chains to be unmerged.
8451 std::vector<GetElementPtrInst *> UGEPIs;
8452// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8453// on IndirectBr edges.
8454for (User *Usr : GEPIOp->users()) {
8455if (Usr == GEPI)
8456continue;
8457// Check if Usr is an Instruction. If not, give up.
8458if (!isa<Instruction>(Usr))
8459returnfalse;
8460auto *UI = cast<Instruction>(Usr);
8461// Check if Usr in the same block as GEPIOp, which is fine, skip.
8462if (UI->getParent() == SrcBlock)
8463continue;
8464// Check if Usr is a GEP. If not, give up.
8465if (!isa<GetElementPtrInst>(Usr))
8466returnfalse;
8467auto *UGEPI = cast<GetElementPtrInst>(Usr);
8468// Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8469// the pointer operand to it. If so, record it in the vector. If not, give
8470// up.
8471if (!GEPSequentialConstIndexed(UGEPI))
8472returnfalse;
8473if (UGEPI->getOperand(0) != GEPIOp)
8474returnfalse;
8475if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8476returnfalse;
8477if (GEPIIdx->getType() !=
8478 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8479returnfalse;
8480ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8481if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8482TargetTransformInfo::TCK_SizeAndLatency) >
8483TargetTransformInfo::TCC_Basic)
8484returnfalse;
8485 UGEPIs.push_back(UGEPI);
8486 }
8487if (UGEPIs.size() == 0)
8488returnfalse;
8489// Check the materializing cost of (Uidx-Idx).
8490for (GetElementPtrInst *UGEPI : UGEPIs) {
8491ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8492APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8493InstructionCost ImmCost =TTI->getIntImmCost(
8494 NewIdx, GEPIIdx->getType(),TargetTransformInfo::TCK_SizeAndLatency);
8495if (ImmCost >TargetTransformInfo::TCC_Basic)
8496returnfalse;
8497 }
8498// Now unmerge between GEPI and UGEPIs.
8499for (GetElementPtrInst *UGEPI : UGEPIs) {
8500 UGEPI->setOperand(0, GEPI);
8501ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8502Constant *NewUGEPIIdx = ConstantInt::get(
8503 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8504 UGEPI->setOperand(1, NewUGEPIIdx);
8505// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8506// inbounds to avoid UB.
8507if (!GEPI->isInBounds()) {
8508 UGEPI->setIsInBounds(false);
8509 }
8510 }
8511// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8512// alive on IndirectBr edges).
8513assert(llvm::none_of(GEPIOp->users(),
8514 [&](User *Usr) {
8515 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8516 }) &&
8517"GEPIOp is used outside SrcBlock");
8518returntrue;
8519}
8520
8521staticbooloptimizeBranch(BranchInst *Branch,constTargetLowering &TLI,
8522SmallSet<BasicBlock *, 32> &FreshBBs,
8523bool IsHugeFunc) {
8524// Try and convert
8525// %c = icmp ult %x, 8
8526// br %c, bla, blb
8527// %tc = lshr %x, 3
8528// to
8529// %tc = lshr %x, 3
8530// %c = icmp eq %tc, 0
8531// br %c, bla, blb
8532// Creating the cmp to zero can be better for the backend, especially if the
8533// lshr produces flags that can be used automatically.
8534if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8535returnfalse;
8536
8537ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8538if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8539returnfalse;
8540
8541Value *X = Cmp->getOperand(0);
8542APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8543
8544for (auto *U :X->users()) {
8545Instruction *UI = dyn_cast<Instruction>(U);
8546// A quick dominance check
8547if (!UI ||
8548 (UI->getParent() != Branch->getParent() &&
8549 UI->getParent() != Branch->getSuccessor(0) &&
8550 UI->getParent() != Branch->getSuccessor(1)) ||
8551 (UI->getParent() != Branch->getParent() &&
8552 !UI->getParent()->getSinglePredecessor()))
8553continue;
8554
8555if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8556match(UI,m_Shr(m_Specific(X),m_SpecificInt(CmpC.logBase2())))) {
8557IRBuilder<> Builder(Branch);
8558if (UI->getParent() != Branch->getParent())
8559 UI->moveBefore(Branch->getIterator());
8560 UI->dropPoisonGeneratingFlags();
8561Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8562 ConstantInt::get(UI->getType(), 0));
8563LLVM_DEBUG(dbgs() <<"Converting " << *Cmp <<"\n");
8564LLVM_DEBUG(dbgs() <<" to compare on zero: " << *NewCmp <<"\n");
8565replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8566returntrue;
8567 }
8568if (Cmp->isEquality() &&
8569 (match(UI,m_Add(m_Specific(X),m_SpecificInt(-CmpC))) ||
8570match(UI,m_Sub(m_Specific(X),m_SpecificInt(CmpC))))) {
8571IRBuilder<> Builder(Branch);
8572if (UI->getParent() != Branch->getParent())
8573 UI->moveBefore(Branch->getIterator());
8574 UI->dropPoisonGeneratingFlags();
8575Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8576 ConstantInt::get(UI->getType(), 0));
8577LLVM_DEBUG(dbgs() <<"Converting " << *Cmp <<"\n");
8578LLVM_DEBUG(dbgs() <<" to compare on zero: " << *NewCmp <<"\n");
8579replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8580returntrue;
8581 }
8582 }
8583returnfalse;
8584}
8585
8586bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8587bool AnyChange =false;
8588 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8589
8590// Bail out if we inserted the instruction to prevent optimizations from
8591// stepping on each other's toes.
8592if (InsertedInsts.count(I))
8593return AnyChange;
8594
8595// TODO: Move into the switch on opcode below here.
8596if (PHINode *P = dyn_cast<PHINode>(I)) {
8597// It is possible for very late stage optimizations (such as SimplifyCFG)
8598// to introduce PHI nodes too late to be cleaned up. If we detect such a
8599// trivial PHI, go ahead and zap it here.
8600if (Value *V =simplifyInstruction(P, {*DL, TLInfo})) {
8601 LargeOffsetGEPMap.erase(P);
8602replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8603P->eraseFromParent();
8604 ++NumPHIsElim;
8605returntrue;
8606 }
8607return AnyChange;
8608 }
8609
8610if (CastInst *CI = dyn_cast<CastInst>(I)) {
8611// If the source of the cast is a constant, then this should have
8612// already been constant folded. The only reason NOT to constant fold
8613// it is if something (e.g. LSR) was careful to place the constant
8614// evaluation in a block other than then one that uses it (e.g. to hoist
8615// the address of globals out of a loop). If this is the case, we don't
8616// want to forward-subst the cast.
8617if (isa<Constant>(CI->getOperand(0)))
8618return AnyChange;
8619
8620if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8621returntrue;
8622
8623if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) ||
8624 isa<TruncInst>(I)) &&
8625 TLI->optimizeExtendOrTruncateConversion(
8626I, LI->getLoopFor(I->getParent()), *TTI))
8627returntrue;
8628
8629if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8630 /// Sink a zext or sext into its user blocks if the target type doesn't
8631 /// fit in one register
8632if (TLI->getTypeAction(CI->getContext(),
8633 TLI->getValueType(*DL, CI->getType())) ==
8634 TargetLowering::TypeExpandInteger) {
8635returnSinkCast(CI);
8636 }else {
8637if (TLI->optimizeExtendOrTruncateConversion(
8638I, LI->getLoopFor(I->getParent()), *TTI))
8639returntrue;
8640
8641bool MadeChange = optimizeExt(I);
8642return MadeChange | optimizeExtUses(I);
8643 }
8644 }
8645return AnyChange;
8646 }
8647
8648if (auto *Cmp = dyn_cast<CmpInst>(I))
8649if (optimizeCmp(Cmp, ModifiedDT))
8650returntrue;
8651
8652if (match(I,m_URem(m_Value(),m_Value())))
8653if (optimizeURem(I))
8654returntrue;
8655
8656if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8657 LI->setMetadata(LLVMContext::MD_invariant_group,nullptr);
8658boolModified = optimizeLoadExt(LI);
8659unsigned AS = LI->getPointerAddressSpace();
8660Modified |= optimizeMemoryInst(I,I->getOperand(0), LI->getType(), AS);
8661returnModified;
8662 }
8663
8664if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8665if (splitMergedValStore(*SI, *DL, *TLI))
8666returntrue;
8667SI->setMetadata(LLVMContext::MD_invariant_group,nullptr);
8668unsigned AS =SI->getPointerAddressSpace();
8669return optimizeMemoryInst(I,SI->getOperand(1),
8670SI->getOperand(0)->getType(), AS);
8671 }
8672
8673if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8674unsigned AS = RMW->getPointerAddressSpace();
8675return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8676 }
8677
8678if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8679unsigned AS = CmpX->getPointerAddressSpace();
8680return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8681 CmpX->getCompareOperand()->getType(), AS);
8682 }
8683
8684BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8685
8686if (BinOp && BinOp->getOpcode() == Instruction::And &&EnableAndCmpSinking &&
8687sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8688returntrue;
8689
8690// TODO: Move this into the switch on opcode - it handles shifts already.
8691if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8692 BinOp->getOpcode() == Instruction::LShr)) {
8693ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8694if (CI && TLI->hasExtractBitsInsn())
8695if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8696returntrue;
8697 }
8698
8699if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8700if (GEPI->hasAllZeroIndices()) {
8701 /// The GEP operand must be a pointer, so must its result -> BitCast
8702Instruction *NC =newBitCastInst(GEPI->getOperand(0), GEPI->getType(),
8703 GEPI->getName(), GEPI->getIterator());
8704NC->setDebugLoc(GEPI->getDebugLoc());
8705replaceAllUsesWith(GEPI,NC, FreshBBs, IsHugeFunc);
8706RecursivelyDeleteTriviallyDeadInstructions(
8707 GEPI, TLInfo,nullptr,
8708 [&](Value *V) { removeAllAssertingVHReferences(V); });
8709 ++NumGEPsElim;
8710 optimizeInst(NC, ModifiedDT);
8711returntrue;
8712 }
8713if (tryUnmergingGEPsAcrossIndirectBr(GEPI,TTI)) {
8714returntrue;
8715 }
8716 }
8717
8718if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8719// freeze(icmp a, const)) -> icmp (freeze a), const
8720// This helps generate efficient conditional jumps.
8721Instruction *CmpI =nullptr;
8722if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8723 CmpI =II;
8724elseif (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8725 CmpI =F->getFastMathFlags().none() ?F :nullptr;
8726
8727if (CmpI && CmpI->hasOneUse()) {
8728auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8729bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8730 isa<ConstantPointerNull>(Op0);
8731bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8732 isa<ConstantPointerNull>(Op1);
8733if (Const0 || Const1) {
8734if (!Const0 || !Const1) {
8735auto *F =newFreezeInst(Const0 ? Op1 : Op0,"", CmpI->getIterator());
8736F->takeName(FI);
8737 CmpI->setOperand(Const0 ? 1 : 0,F);
8738 }
8739replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8740 FI->eraseFromParent();
8741returntrue;
8742 }
8743 }
8744return AnyChange;
8745 }
8746
8747if (tryToSinkFreeOperands(I))
8748returntrue;
8749
8750switch (I->getOpcode()) {
8751case Instruction::Shl:
8752case Instruction::LShr:
8753case Instruction::AShr:
8754return optimizeShiftInst(cast<BinaryOperator>(I));
8755case Instruction::Call:
8756returnoptimizeCallInst(cast<CallInst>(I), ModifiedDT);
8757case Instruction::Select:
8758return optimizeSelectInst(cast<SelectInst>(I));
8759case Instruction::ShuffleVector:
8760return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8761case Instruction::Switch:
8762return optimizeSwitchInst(cast<SwitchInst>(I));
8763case Instruction::ExtractElement:
8764return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8765case Instruction::Br:
8766returnoptimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8767 }
8768
8769return AnyChange;
8770}
8771
8772/// Given an OR instruction, check to see if this is a bitreverse
8773/// idiom. If so, insert the new intrinsic and return true.
8774bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8775if (!I.getType()->isIntegerTy() ||
8776 !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,
8777 TLI->getValueType(*DL,I.getType(),true)))
8778returnfalse;
8779
8780SmallVector<Instruction *, 4> Insts;
8781if (!recognizeBSwapOrBitReverseIdiom(&I,false,true, Insts))
8782returnfalse;
8783Instruction *LastInst = Insts.back();
8784replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8785RecursivelyDeleteTriviallyDeadInstructions(
8786 &I, TLInfo,nullptr,
8787 [&](Value *V) { removeAllAssertingVHReferences(V); });
8788returntrue;
8789}
8790
8791// In this pass we look for GEP and cast instructions that are used
8792// across basic blocks and rewrite them to improve basic-block-at-a-time
8793// selection.
8794bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8795 SunkAddrs.clear();
8796bool MadeChange =false;
8797
8798do {
8799 CurInstIterator = BB.begin();
8800 ModifiedDT = ModifyDT::NotModifyDT;
8801while (CurInstIterator != BB.end()) {
8802 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8803if (ModifiedDT != ModifyDT::NotModifyDT) {
8804// For huge function we tend to quickly go though the inner optmization
8805// opportunities in the BB. So we go back to the BB head to re-optimize
8806// each instruction instead of go back to the function head.
8807if (IsHugeFunc) {
8808 DT.reset();
8809 getDT(*BB.getParent());
8810break;
8811 }else {
8812returntrue;
8813 }
8814 }
8815 }
8816 }while (ModifiedDT == ModifyDT::ModifyInstDT);
8817
8818bool MadeBitReverse =true;
8819while (MadeBitReverse) {
8820 MadeBitReverse =false;
8821for (auto &I :reverse(BB)) {
8822if (makeBitReverse(I)) {
8823 MadeBitReverse = MadeChange =true;
8824break;
8825 }
8826 }
8827 }
8828 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8829
8830return MadeChange;
8831}
8832
8833// Some CGP optimizations may move or alter what's computed in a block. Check
8834// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8835bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8836assert(isa<DbgValueInst>(I));
8837DbgValueInst &DVI = *cast<DbgValueInst>(I);
8838
8839// Does this dbg.value refer to a sunk address calculation?
8840bool AnyChange =false;
8841SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8842 DVI.location_ops().end());
8843for (Value *Location : LocationOps) {
8844WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8845Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH :nullptr;
8846if (SunkAddr) {
8847// Point dbg.value at locally computed address, which should give the best
8848// opportunity to be accurately lowered. This update may change the type
8849// of pointer being referred to; however this makes no difference to
8850// debugging information, and we can't generate bitcasts that may affect
8851// codegen.
8852 DVI.replaceVariableLocationOp(Location, SunkAddr);
8853 AnyChange =true;
8854 }
8855 }
8856return AnyChange;
8857}
8858
8859bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8860bool AnyChange =false;
8861for (DbgVariableRecord &DVR :filterDbgVars(I.getDbgRecordRange()))
8862 AnyChange |= fixupDbgVariableRecord(DVR);
8863return AnyChange;
8864}
8865
8866// FIXME: should updating debug-info really cause the "changed" flag to fire,
8867// which can cause a function to be reprocessed?
8868bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8869if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8870 DVR.Type != DbgVariableRecord::LocationType::Assign)
8871returnfalse;
8872
8873// Does this DbgVariableRecord refer to a sunk address calculation?
8874bool AnyChange =false;
8875SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8876 DVR.location_ops().end());
8877for (Value *Location : LocationOps) {
8878WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8879Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH :nullptr;
8880if (SunkAddr) {
8881// Point dbg.value at locally computed address, which should give the best
8882// opportunity to be accurately lowered. This update may change the type
8883// of pointer being referred to; however this makes no difference to
8884// debugging information, and we can't generate bitcasts that may affect
8885// codegen.
8886 DVR.replaceVariableLocationOp(Location, SunkAddr);
8887 AnyChange =true;
8888 }
8889 }
8890return AnyChange;
8891}
8892
8893staticvoidDbgInserterHelper(DbgValueInst *DVI,BasicBlock::iterator VI) {
8894 DVI->removeFromParent();
8895if (isa<PHINode>(VI))
8896 DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
8897else
8898 DVI->insertAfter(VI);
8899}
8900
8901staticvoidDbgInserterHelper(DbgVariableRecord *DVR,BasicBlock::iterator VI) {
8902 DVR->removeFromParent();
8903BasicBlock *VIBB = VI->getParent();
8904if (isa<PHINode>(VI))
8905 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8906else
8907 VIBB->insertDbgRecordAfter(DVR, &*VI);
8908}
8909
8910// A llvm.dbg.value may be using a value before its definition, due to
8911// optimizations in this pass and others. Scan for such dbg.values, and rescue
8912// them by moving the dbg.value to immediately after the value definition.
8913// FIXME: Ideally this should never be necessary, and this has the potential
8914// to re-order dbg.value intrinsics.
8915bool CodeGenPrepare::placeDbgValues(Function &F) {
8916bool MadeChange =false;
8917DominatorTree DT(F);
8918
8919auto DbgProcessor = [&](auto *DbgItem,Instruction *Position) {
8920SmallVector<Instruction *, 4> VIs;
8921for (Value *V : DbgItem->location_ops())
8922if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8923 VIs.push_back(VI);
8924
8925// This item may depend on multiple instructions, complicating any
8926// potential sink. This block takes the defensive approach, opting to
8927// "undef" the item if it has more than one instruction and any of them do
8928// not dominate iem.
8929for (Instruction *VI : VIs) {
8930if (VI->isTerminator())
8931continue;
8932
8933// If VI is a phi in a block with an EHPad terminator, we can't insert
8934// after it.
8935if (isa<PHINode>(VI) &&VI->getParent()->getTerminator()->isEHPad())
8936continue;
8937
8938// If the defining instruction dominates the dbg.value, we do not need
8939// to move the dbg.value.
8940if (DT.dominates(VI, Position))
8941continue;
8942
8943// If we depend on multiple instructions and any of them doesn't
8944// dominate this DVI, we probably can't salvage it: moving it to
8945// after any of the instructions could cause us to lose the others.
8946if (VIs.size() > 1) {
8947LLVM_DEBUG(
8948dbgs()
8949 <<"Unable to find valid location for Debug Value, undefing:\n"
8950 << *DbgItem);
8951 DbgItem->setKillLocation();
8952break;
8953 }
8954
8955LLVM_DEBUG(dbgs() <<"Moving Debug Value before :\n"
8956 << *DbgItem <<' ' << *VI);
8957DbgInserterHelper(DbgItem,VI->getIterator());
8958 MadeChange =true;
8959 ++NumDbgValueMoved;
8960 }
8961 };
8962
8963for (BasicBlock &BB :F) {
8964for (Instruction &Insn :llvm::make_early_inc_range(BB)) {
8965// Process dbg.value intrinsics.
8966DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
8967if (DVI) {
8968 DbgProcessor(DVI, DVI);
8969continue;
8970 }
8971
8972// If this isn't a dbg.value, process any attached DbgVariableRecord
8973// records attached to this instruction.
8974for (DbgVariableRecord &DVR :llvm::make_early_inc_range(
8975filterDbgVars(Insn.getDbgRecordRange()))) {
8976if (DVR.Type != DbgVariableRecord::LocationType::Value)
8977continue;
8978 DbgProcessor(&DVR, &Insn);
8979 }
8980 }
8981 }
8982
8983return MadeChange;
8984}
8985
8986// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
8987// probes can be chained dependencies of other regular DAG nodes and block DAG
8988// combine optimizations.
8989bool CodeGenPrepare::placePseudoProbes(Function &F) {
8990bool MadeChange =false;
8991for (auto &Block :F) {
8992// Move the rest probes to the beginning of the block.
8993auto FirstInst =Block.getFirstInsertionPt();
8994while (FirstInst !=Block.end() && FirstInst->isDebugOrPseudoInst())
8995 ++FirstInst;
8996BasicBlock::iteratorI(FirstInst);
8997I++;
8998while (I !=Block.end()) {
8999if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9000II->moveBefore(FirstInst);
9001 MadeChange =true;
9002 }
9003 }
9004 }
9005return MadeChange;
9006}
9007
9008/// Scale down both weights to fit into uint32_t.
9009staticvoidscaleWeights(uint64_t &NewTrue,uint64_t &NewFalse) {
9010uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9011uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9012 NewTrue = NewTrue / Scale;
9013 NewFalse = NewFalse / Scale;
9014}
9015
9016/// Some targets prefer to split a conditional branch like:
9017/// \code
9018/// %0 = icmp ne i32 %a, 0
9019/// %1 = icmp ne i32 %b, 0
9020/// %or.cond = or i1 %0, %1
9021/// br i1 %or.cond, label %TrueBB, label %FalseBB
9022/// \endcode
9023/// into multiple branch instructions like:
9024/// \code
9025/// bb1:
9026/// %0 = icmp ne i32 %a, 0
9027/// br i1 %0, label %TrueBB, label %bb2
9028/// bb2:
9029/// %1 = icmp ne i32 %b, 0
9030/// br i1 %1, label %TrueBB, label %FalseBB
9031/// \endcode
9032/// This usually allows instruction selection to do even further optimizations
9033/// and combine the compare with the branch instruction. Currently this is
9034/// applied for targets which have "cheap" jump instructions.
9035///
9036/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9037///
9038bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9039if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9040returnfalse;
9041
9042bool MadeChange =false;
9043for (auto &BB :F) {
9044// Does this BB end with the following?
9045// %cond1 = icmp|fcmp|binary instruction ...
9046// %cond2 = icmp|fcmp|binary instruction ...
9047// %cond.or = or|and i1 %cond1, cond2
9048// br i1 %cond.or label %dest1, label %dest2"
9049Instruction *LogicOp;
9050BasicBlock *TBB, *FBB;
9051if (!match(BB.getTerminator(),
9052m_Br(m_OneUse(m_Instruction(LogicOp)),TBB, FBB)))
9053continue;
9054
9055auto *Br1 = cast<BranchInst>(BB.getTerminator());
9056if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9057continue;
9058
9059// The merging of mostly empty BB can cause a degenerate branch.
9060if (TBB == FBB)
9061continue;
9062
9063unsigned Opc;
9064Value *Cond1, *Cond2;
9065if (match(LogicOp,
9066m_LogicalAnd(m_OneUse(m_Value(Cond1)),m_OneUse(m_Value(Cond2)))))
9067 Opc = Instruction::And;
9068elseif (match(LogicOp,m_LogicalOr(m_OneUse(m_Value(Cond1)),
9069m_OneUse(m_Value(Cond2)))))
9070 Opc = Instruction::Or;
9071else
9072continue;
9073
9074auto IsGoodCond = [](Value *Cond) {
9075returnmatch(
9076Cond,
9077m_CombineOr(m_Cmp(),m_CombineOr(m_LogicalAnd(m_Value(),m_Value()),
9078m_LogicalOr(m_Value(),m_Value()))));
9079 };
9080if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9081continue;
9082
9083LLVM_DEBUG(dbgs() <<"Before branch condition splitting\n"; BB.dump());
9084
9085// Create a new BB.
9086auto *TmpBB =
9087BasicBlock::Create(BB.getContext(), BB.getName() +".cond.split",
9088 BB.getParent(), BB.getNextNode());
9089if (IsHugeFunc)
9090 FreshBBs.insert(TmpBB);
9091
9092// Update original basic block by using the first condition directly by the
9093// branch instruction and removing the no longer needed and/or instruction.
9094 Br1->setCondition(Cond1);
9095 LogicOp->eraseFromParent();
9096
9097// Depending on the condition we have to either replace the true or the
9098// false successor of the original branch instruction.
9099if (Opc == Instruction::And)
9100 Br1->setSuccessor(0, TmpBB);
9101else
9102 Br1->setSuccessor(1, TmpBB);
9103
9104// Fill in the new basic block.
9105auto *Br2 =IRBuilder<>(TmpBB).CreateCondBr(Cond2,TBB, FBB);
9106if (auto *I = dyn_cast<Instruction>(Cond2)) {
9107I->removeFromParent();
9108I->insertBefore(Br2->getIterator());
9109 }
9110
9111// Update PHI nodes in both successors. The original BB needs to be
9112// replaced in one successor's PHI nodes, because the branch comes now from
9113// the newly generated BB (NewBB). In the other successor we need to add one
9114// incoming edge to the PHI nodes, because both branch instructions target
9115// now the same successor. Depending on the original branch condition
9116// (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9117// we perform the correct update for the PHI nodes.
9118// This doesn't change the successor order of the just created branch
9119// instruction (or any other instruction).
9120if (Opc == Instruction::Or)
9121std::swap(TBB, FBB);
9122
9123// Replace the old BB with the new BB.
9124TBB->replacePhiUsesWith(&BB, TmpBB);
9125
9126// Add another incoming edge from the new BB.
9127for (PHINode &PN : FBB->phis()) {
9128auto *Val = PN.getIncomingValueForBlock(&BB);
9129 PN.addIncoming(Val, TmpBB);
9130 }
9131
9132// Update the branch weights (from SelectionDAGBuilder::
9133// FindMergedConditions).
9134if (Opc == Instruction::Or) {
9135// Codegen X | Y as:
9136// BB1:
9137// jmp_if_X TBB
9138// jmp TmpBB
9139// TmpBB:
9140// jmp_if_Y TBB
9141// jmp FBB
9142//
9143
9144// We have flexibility in setting Prob for BB1 and Prob for NewBB.
9145// The requirement is that
9146// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9147// = TrueProb for original BB.
9148// Assuming the original weights are A and B, one choice is to set BB1's
9149// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9150// assumes that
9151// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9152// Another choice is to assume TrueProb for BB1 equals to TrueProb for
9153// TmpBB, but the math is more complicated.
9154uint64_t TrueWeight, FalseWeight;
9155if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9156uint64_t NewTrueWeight = TrueWeight;
9157uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9158scaleWeights(NewTrueWeight, NewFalseWeight);
9159 Br1->setMetadata(LLVMContext::MD_prof,
9160MDBuilder(Br1->getContext())
9161 .createBranchWeights(TrueWeight, FalseWeight,
9162hasBranchWeightOrigin(*Br1)));
9163
9164 NewTrueWeight = TrueWeight;
9165 NewFalseWeight = 2 * FalseWeight;
9166scaleWeights(NewTrueWeight, NewFalseWeight);
9167 Br2->setMetadata(LLVMContext::MD_prof,
9168MDBuilder(Br2->getContext())
9169 .createBranchWeights(TrueWeight, FalseWeight));
9170 }
9171 }else {
9172// Codegen X & Y as:
9173// BB1:
9174// jmp_if_X TmpBB
9175// jmp FBB
9176// TmpBB:
9177// jmp_if_Y TBB
9178// jmp FBB
9179//
9180// This requires creation of TmpBB after CurBB.
9181
9182// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9183// The requirement is that
9184// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9185// = FalseProb for original BB.
9186// Assuming the original weights are A and B, one choice is to set BB1's
9187// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9188// assumes that
9189// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9190uint64_t TrueWeight, FalseWeight;
9191if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9192uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9193uint64_t NewFalseWeight = FalseWeight;
9194scaleWeights(NewTrueWeight, NewFalseWeight);
9195 Br1->setMetadata(LLVMContext::MD_prof,
9196MDBuilder(Br1->getContext())
9197 .createBranchWeights(TrueWeight, FalseWeight));
9198
9199 NewTrueWeight = 2 * TrueWeight;
9200 NewFalseWeight = FalseWeight;
9201scaleWeights(NewTrueWeight, NewFalseWeight);
9202 Br2->setMetadata(LLVMContext::MD_prof,
9203MDBuilder(Br2->getContext())
9204 .createBranchWeights(TrueWeight, FalseWeight));
9205 }
9206 }
9207
9208 ModifiedDT = ModifyDT::ModifyBBDT;
9209 MadeChange =true;
9210
9211LLVM_DEBUG(dbgs() <<"After branch condition splitting\n"; BB.dump();
9212 TmpBB->dump());
9213 }
9214return MadeChange;
9215}
Success
#define Success
Definition:AArch64Disassembler.cpp:220
for
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Definition:AArch64ExpandPseudoInsts.cpp:115
getIntrinsicID
static unsigned getIntrinsicID(const SDNode *N)
Definition:AArch64ISelLowering.cpp:7713
canCombine
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
Definition:AArch64InstrInfo.cpp:6370
Insn
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Definition:AArch64MIPeepholeOpt.cpp:167
Select
AMDGPU Register Bank Select
Definition:AMDGPURegBankSelect.cpp:71
PHI
Rewrite undef for PHI
Definition:AMDGPURewriteUndefForPHI.cpp:100
APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
ArrayRef.h
PGOMapFeaturesEnum::BBFreq
@ BBFreq
Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...
getParent
static const Function * getParent(const Value *V)
Definition:BasicAliasAnalysis.cpp:863
BasicBlockSectionsProfileReader.h
BasicBlockUtils.h
BlockFrequencyInfo.h
BlockFrequency.h
From
BlockVerifier::State From
Definition:BlockVerifier.cpp:57
BranchProbabilityInfo.h
BranchProbability.h
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
BypassSlowDivision.h
Casting.h
sinkAndCmp0Expression
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
Definition:CodeGenPrepare.cpp:2207
SinkShiftAndTruncate
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
Definition:CodeGenPrepare.cpp:2299
getGEPSmallConstantIntOffsetV
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Definition:CodeGenPrepare.cpp:1236
generation
Optimize for code generation
Definition:CodeGenPrepare.cpp:539
sinkSelectOperand
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
Definition:CodeGenPrepare.cpp:7265
replaceAllUsesWith
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
Definition:CodeGenPrepare.cpp:1112
isExtractBitsCandidateUse
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
Definition:CodeGenPrepare.cpp:2283
MaxAddressUsersToScan
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
OptimizePhiTypes
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
DisableStoreExtract
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
foldFCmpToFPClassTest
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Definition:CodeGenPrepare.cpp:1950
sinkCmpExpression
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
Definition:CodeGenPrepare.cpp:1772
scaleWeights
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
Definition:CodeGenPrepare.cpp:9009
ProfileUnknownInSpecialSection
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
OptimizeExtractBits
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
Definition:CodeGenPrepare.cpp:2392
DisableExtLdPromotion
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
DisablePreheaderProtect
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
AddrSinkCombineBaseOffs
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
OptimizeNoopCopyExpression
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
Definition:CodeGenPrepare.cpp:1465
splitMergedValStore
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
Definition:CodeGenPrepare.cpp:8255
SinkCast
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
Definition:CodeGenPrepare.cpp:1397
swapICmpOperandsToExposeCSEOpportunities
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
Definition:CodeGenPrepare.cpp:1921
AddrSinkCombineBaseReg
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
FindAllMemoryUses
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
Definition:CodeGenPrepare.cpp:5495
StressStoreExtract
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
isFormingBranchFromSelectProfitable
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
Definition:CodeGenPrepare.cpp:7274
getIVIncrement
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
Definition:CodeGenPrepare.cpp:1527
AddrSinkCombineBaseGV
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
AddrSinkUsingGEPs
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
getTrueOrFalseValue
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
Definition:CodeGenPrepare.cpp:7319
DbgInserterHelper
static void DbgInserterHelper(DbgValueInst *DVI, BasicBlock::iterator VI)
Definition:CodeGenPrepare.cpp:8893
DisableBranchOpts
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
EnableTypePromotionMerge
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
adjustIsPower2Test
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, const TargetTransformInfo &TTI, const DataLayout &DL)
Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1.
Definition:CodeGenPrepare.cpp:2155
ProfileGuidedSectionPrefix
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
HugeFuncThresholdInCGPP
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
AddrSinkNewSelects
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
tryUnmergingGEPsAcrossIndirectBr
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
Definition:CodeGenPrepare.cpp:8416
IsOperandAMemoryOperand
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
Definition:CodeGenPrepare.cpp:5470
isIntrinsicOrLFToBeTailCalled
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
Definition:CodeGenPrepare.cpp:2810
ForceSplitStore
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
computeBaseDerivedRelocateMap
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
Definition:CodeGenPrepare.cpp:1201
simplifyRelocatesOffABase
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
Definition:CodeGenPrepare.cpp:1253
AddrSinkCombineScaledReg
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
foldICmpWithDominatingICmp
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
Definition:CodeGenPrepare.cpp:1851
MightBeFoldableInst
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
Definition:CodeGenPrepare.cpp:4613
matchIncrement
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
Definition:CodeGenPrepare.cpp:1508
EnableGEPOffsetSplit
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
DisableComplexAddrModes
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
EnableICMP_EQToICMP_ST
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
VerifyBFIUpdates
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
BBSectionsGuidedSectionPrefix
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
isRemOfLoopIncrementWithLoopInvariant
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
Definition:CodeGenPrepare.cpp:1983
isIVIncrement
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
Definition:CodeGenPrepare.cpp:1542
DisableGCOpts
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
GEPSequentialConstIndexed
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
Definition:CodeGenPrepare.cpp:8353
isPromotedInstructionLegal
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
Definition:CodeGenPrepare.cpp:4644
FreqRatioToSkipMerge
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
optimizeBranch
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
Definition:CodeGenPrepare.cpp:8521
DEBUG_TYPE
#define DEBUG_TYPE
Definition:CodeGenPrepare.cpp:109
IsNonLocalValue
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
Definition:CodeGenPrepare.cpp:5714
EnableAndCmpSinking
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
despeculateCountZeros
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
Definition:CodeGenPrepare.cpp:2494
hasSameExtUse
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
Definition:CodeGenPrepare.cpp:6316
StressExtLdPromotion
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
matchUAddWithOverflowConstantEdgeCases
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
Definition:CodeGenPrepare.cpp:1638
DisableSelectToBranch
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
DisableDeletePHIs
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
foldURemOfLoopIncrement
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Definition:CodeGenPrepare.cpp:2068
AddrSinkNewPhis
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
CodeGenPrepare.h
Defines an IR pass for CodeGen Prepare.
CommandLine.h
Compiler.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition:Compiler.h:622
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition:Compiler.h:282
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
DataLayout.h
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
Debug.h
LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition:Debug.h:106
DenseMap.h
This file defines the DenseMap class.
DerivedTypes.h
Dominators.h
Addr
uint64_t Addr
Definition:ELFObjHandler.cpp:79
Blocks
DenseMap< Block *, BlockRelaxAux > Blocks
Definition:ELF_riscv.cpp:507
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
GetElementPtrTypeIterator.h
GlobalValue.h
GlobalVariable.h
GEP
Hexagon Common GEP
Definition:HexagonCommonGEP.cpp:170
IRBuilder.h
MI
IRTranslator LLVM IR MI
Definition:IRTranslator.cpp:112
Argument.h
BasicBlock.h
Constant.h
Function.h
Instruction.h
IntrinsicInst.h
Module.h
Module.h This file contains the declarations for the Module class.
Operator.h
Type.h
Use.h
This defines the Use class.
User.h
Value.h
ISDOpcodes.h
InitializePasses.h
InlineAsm.h
InstrTypes.h
InstructionSimplify.h
Instructions.h
Intrinsics.h
ReferenceKind::LValue
@ LValue
eraseInstruction
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition:LICM.cpp:1504
LLVMContext.h
LoopDeletionResult::Modified
@ Modified
LoopInfo.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
MDBuilder.h
AddrMode
AddrMode
Definition:MSP430Disassembler.cpp:141
TRI
unsigned const TargetRegisterInfo * TRI
Definition:MachineSink.cpp:2029
MachineValueType.h
MapVector.h
This file implements a map that provides insertion order iteration.
II
uint64_t IntrinsicInst * II
Definition:NVVMIntrRange.cpp:51
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
P
#define P(N)
INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition:PassSupport.h:55
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:57
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition:PassSupport.h:52
Pass.h
PatternMatch.h
PointerIntPair.h
This file defines the PointerIntPair class.
ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.
ProfileSummaryInfo.h
TBB
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
Definition:RISCVRedundantCopyElimination.cpp:76
Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition:RISCVRedundantCopyElimination.cpp:75
dominates
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Definition:RegAllocFast.cpp:485
Uses
Remove Loads Into Fake Uses
Definition:RemoveLoadsIntoFakeUses.cpp:75
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Address
@ Address
Definition:SPIRVEmitNonSemanticDI.cpp:68
STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.
OS
raw_pwrite_stream & OS
Definition:SampleProfWriter.cpp:51
ScalarEvolutionExpressions.h
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
Definition:ScalarizeMaskedMemIntrin.cpp:1060
optimizeCallInst
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
Definition:ScalarizeMaskedMemIntrin.cpp:1077
SelectionDAGNodes.h
SimplifyLibCalls.h
SizeOpts.h
SmallPtrSet.h
This file defines the SmallPtrSet class.
SmallVector.h
This file defines the SmallVector class.
IRDumpFileSuffixType::Before
@ Before
Statepoint.h
Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition:Statistic.h:166
getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition:TapiFile.cpp:39
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
TargetLibraryInfo.h
TargetLowering.h
This file describes how to lower LLVM code to machine code.
TargetOptions.h
DisableSelectOptimize
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.
TargetSubtargetInfo.h
TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.
Local.h
ValueHandle.h
ValueMap.h
getBitWidth
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Definition:ValueTracking.cpp:93
ValueTracking.h
ValueTypes.h
VectorUtils.h
getConstantVector
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Definition:X86ISelLowering.cpp:7419
RHS
Value * RHS
Definition:X86PartialReduction.cpp:74
LHS
Value * LHS
Definition:X86PartialReduction.cpp:73
BaseTy
PointerType
Definition:ItaniumDemangle.h:627
llvm::APInt
Class for arbitrary precision integers.
Definition:APInt.h:78
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition:APInt.cpp:986
llvm::APInt::ugt
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition:APInt.h:1182
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition:APInt.h:380
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition:APInt.h:435
llvm::APInt::getSignificantBits
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition:APInt.h:1511
llvm::APInt::logBase2
unsigned logBase2() const
Definition:APInt.h:1739
llvm::APInt::sext
APInt sext(unsigned width) const
Sign extend to a new width.
Definition:APInt.cpp:959
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition:APInt.h:440
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition:APInt.h:1542
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition:Instructions.h:63
llvm::AllocaInst::isStaticAlloca
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Definition:Instructions.cpp:1234
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition:Instructions.h:124
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition:Instructions.h:117
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition:Instructions.h:128
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition:PassManager.h:253
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition:PassManager.h:429
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition:PassManager.h:410
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition:PassAnalysisSupport.h:47
llvm::AnalysisUsage::addUsedIfAvailable
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
Definition:PassAnalysisSupport.h:117
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition:PassAnalysisSupport.h:75
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::AssertingVH
Value handle that asserts if the Value is deleted.
Definition:ValueHandle.h:264
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition:Instructions.h:501
llvm::AtomicCmpXchgInst::getPointerOperandIndex
static unsigned getPointerOperandIndex()
Definition:Instructions.h:631
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition:Instructions.h:704
llvm::AtomicRMWInst::getPointerOperandIndex
static unsigned getPointerOperandIndex()
Definition:Instructions.h:872
llvm::BasicBlockSectionsProfileReaderAnalysis
Analysis pass providing the BasicBlockSectionsProfileReader.
Definition:BasicBlockSectionsProfileReader.h:165
llvm::BasicBlockSectionsProfileReaderWrapperPass
Definition:BasicBlockSectionsProfileReader.h:178
llvm::BasicBlockSectionsProfileReader
Definition:BasicBlockSectionsProfileReader.h:76
llvm::BasicBlockSectionsProfileReader::isFunctionHot
bool isFunctionHot(StringRef FuncName) const
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::end
iterator end()
Definition:BasicBlock.h:474
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition:BasicBlock.h:461
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition:BasicBlock.h:530
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition:BasicBlock.cpp:437
llvm::BasicBlock::hasAddressTaken
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition:BasicBlock.h:671
llvm::BasicBlock::getFirstNonPHIIt
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition:BasicBlock.cpp:381
llvm::BasicBlock::insertDbgRecordBefore
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
Definition:BasicBlock.cpp:1088
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition:BasicBlock.h:178
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition:BasicBlock.h:213
llvm::BasicBlock::getFirstNonPHIOrDbg
InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition:BasicBlock.cpp:398
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition:BasicBlock.cpp:599
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition:BasicBlock.cpp:481
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition:BasicBlock.cpp:489
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition:BasicBlock.cpp:511
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition:BasicBlock.h:220
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition:BasicBlock.cpp:279
llvm::BasicBlock::insertDbgRecordAfter
void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
Definition:BasicBlock.cpp:1079
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition:BasicBlock.h:177
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition:BasicBlock.cpp:168
llvm::BasicBlock::IsNewDbgInfoFormat
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition:BasicBlock.h:67
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition:BasicBlock.h:240
llvm::BinaryOperator
Definition:InstrTypes.h:170
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition:InstrTypes.h:370
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition:Instructions.cpp:2639
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition:Instructions.h:4894
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition:BlockFrequencyInfo.h:37
llvm::BlockFrequency
Definition:BlockFrequency.h:26
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition:Instructions.h:3016
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition:Instructions.cpp:1168
llvm::BranchInst::isConditional
bool isConditional() const
Definition:Instructions.h:3090
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition:Instructions.h:3104
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition:Instructions.h:3089
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition:BranchProbabilityInfo.h:112
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition:BranchProbability.cpp:53
llvm::CallBase::isInlineAsm
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition:InstrTypes.h:1408
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition:InstrTypes.h:1341
llvm::CallBase::hasFnAttr
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition:InstrTypes.h:1451
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition:InstrTypes.h:1286
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition:InstrTypes.h:1291
llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition:InstrTypes.h:1277
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition:InstrTypes.h:444
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition:Instructions.cpp:2972
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition:InstrTypes.h:661
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition:InstrTypes.h:673
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition:InstrTypes.h:702
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition:InstrTypes.h:696
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition:InstrTypes.h:700
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition:InstrTypes.h:698
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition:InstrTypes.h:694
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition:InstrTypes.h:695
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition:InstrTypes.h:825
llvm::CmpInst::Create
static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Definition:Instructions.cpp:3434
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition:InstrTypes.h:763
llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition:CmpPredicate.h:22
llvm::CodeGenPreparePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition:CodeGenPrepare.cpp:545
llvm::ConstantData
Base class for constants with no operands.
Definition:Constants.h:53
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition:Constants.h:1108
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition:Constants.cpp:2321
llvm::ConstantExpr::getNeg
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition:Constants.cpp:2626
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition:Constants.h:83
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition:Constants.cpp:866
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition:Constants.h:208
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition:Constants.h:163
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition:Constants.h:148
llvm::ConstantVector::getSplat
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition:Constants.cpp:1472
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition:Constants.cpp:1421
llvm::Constant
This is an important base class in LLVM.
Definition:Constant.h:42
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition:Constants.cpp:420
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition:Constants.cpp:373
llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition:DWARFExpression.h:32
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::DataLayout::getIntPtrType
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition:DataLayout.cpp:851
llvm::DbgRecord::removeFromParent
void removeFromParent()
Definition:DebugProgramInstruction.cpp:674
llvm::DbgValueInst
This represents the llvm.dbg.value instruction.
Definition:IntrinsicInst.h:468
llvm::DbgVariableIntrinsic::location_ops
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
Definition:IntrinsicInst.cpp:91
llvm::DbgVariableIntrinsic::replaceVariableLocationOp
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Definition:IntrinsicInst.cpp:121
llvm::DbgVariableRecord
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Definition:DebugProgramInstruction.h:270
llvm::DbgVariableRecord::Type
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
Definition:DebugProgramInstruction.h:286
llvm::DbgVariableRecord::replaceVariableLocationOp
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Definition:DebugProgramInstruction.cpp:286
llvm::DbgVariableRecord::location_ops
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
Definition:DebugProgramInstruction.cpp:234
llvm::DebugLoc
A debug info location.
Definition:DebugLoc.h:33
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition:DenseMap.h:156
llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition:DenseMap.h:321
llvm::DenseMapBase::size
unsigned size() const
Definition:DenseMap.h:99
llvm::DenseMapBase::end
iterator end()
Definition:DenseMap.h:84
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition:DenseMap.h:211
llvm::DenseMapBase::clear
void clear()
Definition:DenseMap.h:110
llvm::DenseMap
Definition:DenseMap.h:727
llvm::DominatorTreeBase::reset
void reset()
Definition:GenericDomTree.h:909
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition:Dominators.h:162
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition:Dominators.cpp:321
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition:Dominators.cpp:122
llvm::ElementCount
Definition:TypeSize.h:300
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition:Instructions.h:2397
llvm::ExtractValueInst::indices
iterator_range< idx_iterator > indices() const
Definition:Instructions.h:2435
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition:Instructions.h:1379
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition:Type.cpp:791
llvm::FortifiedLibCallSimplifier
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
Definition:SimplifyLibCalls.h:41
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition:Instructions.h:5088
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition:Pass.h:310
llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
llvm::Function
Definition:Function.h:63
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition:Function.h:809
llvm::GCProjectionInst::getStatepoint
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Definition:IntrinsicInst.cpp:842
llvm::GCRelocateInst
Represents calls to the gc.relocate intrinsic.
Definition:IntrinsicInst.h:1802
llvm::GCRelocateInst::getBasePtrIndex
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Definition:IntrinsicInst.h:1815
llvm::GCStatepointInst
Represents a gc.statepoint intrinsic call.
Definition:Statepoint.h:61
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition:Instructions.h:933
llvm::GetElementPtrInst::getIndexedType
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Definition:Instructions.cpp:1514
llvm::GlobalObject::setAlignment
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition:Globals.cpp:143
llvm::GlobalObject::canIncreaseAlignment
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition:Globals.cpp:310
llvm::GlobalValue
Definition:GlobalValue.h:48
llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition:GlobalValue.h:264
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition:GlobalValue.h:297
llvm::GlobalVariable
Definition:GlobalVariable.h:39
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition:Instructions.h:1158
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition:Instructions.h:1291
llvm::IRBuilderBase::CreateZExtOrBitCast
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition:IRBuilder.h:2162
llvm::IRBuilderBase::getTrue
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition:IRBuilder.h:485
llvm::IRBuilderBase::CreateSelect
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition:IRBuilder.cpp:1053
llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition:IRBuilder.h:2574
llvm::IRBuilderBase::SetCurrentDebugLocation
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition:IRBuilder.h:239
llvm::IRBuilderBase::CreateNUWAdd
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:1383
llvm::IRBuilderBase::CreateGEP
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition:IRBuilder.h:1874
llvm::IRBuilderBase::createIsFPClass
Value * createIsFPClass(Value *FPNum, unsigned Test)
Definition:IRBuilder.cpp:1248
llvm::IRBuilderBase::CreateCmp
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition:IRBuilder.h:2404
llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition:IRBuilder.h:2435
llvm::IRBuilderBase::CreateICmpEQ
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:2270
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition:IRBuilder.h:2152
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition:IRBuilder.h:1164
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition:IRBuilder.h:199
llvm::IRBuilderBase::CreateAlignedStore
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition:IRBuilder.h:1834
llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:2380
llvm::IRBuilderBase::getInt
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition:IRBuilder.h:521
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition:IRBuilder.h:2705
llvm::InlineAsm
Definition:InlineAsm.h:34
llvm::InlineAsm::isInput
@ isInput
Definition:InlineAsm.h:96
llvm::InstructionCost
Definition:InstructionCost.h:29
llvm::Instruction
Definition:Instruction.h:68
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition:Instruction.cpp:1364
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition:Instruction.cpp:80
llvm::Instruction::setHasNoSignedWrap
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
Definition:Instruction.cpp:386
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition:Instruction.cpp:99
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition:Instruction.h:511
llvm::Instruction::getPrevNonDebugInstruction
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
Definition:Instruction.cpp:1234
llvm::Instruction::moveAfter
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition:Instruction.cpp:191
llvm::Instruction::hasMetadata
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition:Instruction.h:404
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition:Instruction.h:869
llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition:Instruction.cpp:94
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition:Instruction.h:169
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition:Instruction.cpp:72
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition:Instruction.cpp:334
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition:Metadata.cpp:1679
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition:Instruction.h:310
llvm::Instruction::BinaryOps
BinaryOps
Definition:Instruction.h:1008
llvm::Instruction::isShift
bool isShift() const
Definition:Instruction.h:318
llvm::Instruction::dropPoisonGeneratingFlags
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition:Instruction.cpp:426
llvm::Instruction::getDbgReinsertionPosition
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
Definition:Instruction.cpp:267
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition:Instruction.h:508
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition:Instruction.cpp:1345
llvm::Instruction::insertAfter
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Definition:Instruction.cpp:111
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition:Instruction.cpp:175
llvm::Instruction::CastOps
CastOps
Definition:Instruction.h:1022
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition:IntrinsicInst.h:48
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition:IntrinsicInst.h:55
llvm::InvokeInst
Invoke instruction.
Definition:Instructions.h:3670
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition:LLVMContext.h:67
llvm::LoadInst
An instruction for reading from memory.
Definition:Instructions.h:176
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition:Instructions.h:261
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition:LoopInfo.h:566
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition:GenericLoopInfo.h:606
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition:LoopInfo.h:593
llvm::LoopInfo
Definition:LoopInfo.h:407
llvm::Loop
Represents a single loop in the control flow graph.
Definition:LoopInfo.h:39
llvm::MDBuilder
Definition:MDBuilder.h:36
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition:MDBuilder.cpp:37
llvm::MVT
Machine Value Type.
Definition:MachineValueType.h:35
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition:MachineValueType.h:308
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition:MachineValueType.h:441
llvm::MachineBasicBlock::replacePhiUsesWith
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition:MachineBasicBlock.cpp:1503
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition:MapVector.h:36
llvm::MapVector::end
iterator end()
Definition:MapVector.h:71
llvm::MapVector::erase
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition:MapVector.h:193
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition:MapVector.h:167
llvm::MapVector::empty
bool empty() const
Definition:MapVector.h:79
llvm::MapVector::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition:MapVector.h:141
llvm::MapVector::clear
void clear()
Definition:MapVector.h:88
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition:IntrinsicInst.h:1205
llvm::MemTransferInst
This class wraps the llvm.memcpy/memmove intrinsics.
Definition:IntrinsicInst.h:1302
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition:PassManager.h:692
llvm::PHINode
Definition:Instructions.h:2600
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition:Instructions.h:2735
llvm::PHINode::incoming_values
op_range incoming_values()
Definition:Instructions.h:2665
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition:Instructions.h:2775
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition:Instructions.h:2695
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition:Instructions.h:2675
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition:Instructions.h:2671
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition:Instructions.h:2635
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition:PassRegistry.cpp:24
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition:Pass.cpp:98
llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition:Pass.cpp:81
llvm::PointerIntPair
PointerIntPair - This class implements a pair of a pointer and small integer.
Definition:PointerIntPair.h:80
llvm::PoisonValue
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition:Constants.h:1460
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition:Constants.cpp:1878
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition:Analysis.h:111
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition:Analysis.h:117
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition:Analysis.h:131
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition:ProfileSummaryInfo.h:372
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition:ProfileSummaryInfo.h:353
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition:ProfileSummaryInfo.h:41
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition:Instructions.h:2938
llvm::ReturnInst::getReturnValue
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Definition:Instructions.h:2980
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition:SelectionDAGNodes.h:145
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition:Instructions.h:1657
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition:Instructions.h:1682
llvm::SetVector
A vector that has set insertion semantics.
Definition:SetVector.h:57
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition:SetVector.h:264
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition:SetVector.h:93
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition:SetVector.h:162
llvm::SetVector::pop_back_val
value_type pop_back_val()
Definition:SetVector.h:285
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition:Instructions.h:1901
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition:Instructions.h:1941
llvm::SmallDenseMap
Definition:DenseMap.h:883
llvm::SmallDenseSet
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition:DenseSet.h:298
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition:SmallPtrSet.h:94
llvm::SmallPtrSetImplBase::clear
void clear()
Definition:SmallPtrSet.h:97
llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition:SmallPtrSet.h:93
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition:SmallPtrSet.h:363
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition:SmallPtrSet.h:401
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition:SmallPtrSet.h:452
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition:SmallPtrSet.h:384
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition:SmallPtrSet.h:519
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition:SetVector.h:370
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition:SmallSet.h:132
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition:SmallSet.h:175
llvm::SmallSet::erase
bool erase(const T &V)
Definition:SmallSet.h:193
llvm::SmallSet::clear
void clear()
Definition:SmallSet.h:204
llvm::SmallSet::contains
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition:SmallSet.h:222
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition:SmallSet.h:181
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition:SmallVector.h:673
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition:SmallVector.h:937
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition:SmallVector.h:663
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition:SmallVector.h:737
llvm::SmallVectorImpl::clear
void clear()
Definition:SmallVector.h:610
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition:SmallVector.h:577
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition:SmallVector.h:638
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVectorTemplateCommon::end
iterator end()
Definition:SmallVector.h:269
llvm::SmallVectorTemplateCommon::front
reference front()
Definition:SmallVector.h:299
llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition:SmallVector.h:267
llvm::SmallVectorTemplateCommon::back
reference back()
Definition:SmallVector.h:308
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StoreInst
An instruction for storing to memory.
Definition:Instructions.h:292
llvm::StoreInst::getPointerOperandIndex
static unsigned getPointerOperandIndex()
Definition:Instructions.h:383
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition:DataLayout.h:567
llvm::StructLayout::getElementOffset
TypeSize getElementOffset(unsigned Idx) const
Definition:DataLayout.h:596
llvm::StructType
Class to represent struct types.
Definition:DerivedTypes.h:218
llvm::SwitchInst::CaseHandle
Definition:Instructions.h:3250
llvm::SwitchInst
Multiway switch.
Definition:Instructions.h:3154
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition:TargetTransformInfo.h:3194
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition:TargetLibraryInfo.h:614
llvm::TargetLibraryInfoWrapperPass
Definition:TargetLibraryInfo.h:639
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition:TargetLibraryInfo.h:280
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition:TargetLibraryInfo.h:345
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition:TargetLoweringBase.cpp:1765
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition:TargetLowering.h:1677
llvm::TargetLoweringBase::isSelectSupported
virtual bool isSelectSupported(SelectSupportKind) const
Definition:TargetLowering.h:454
llvm::TargetLoweringBase::isEqualityCmpFoldedWithSignedCmp
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
Definition:TargetLowering.h:734
llvm::TargetLoweringBase::shouldFormOverflowOp
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
Definition:TargetLowering.h:3354
llvm::TargetLoweringBase::isMaskAndCmp0FoldingBeneficial
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
Definition:TargetLowering.h:756
llvm::TargetLoweringBase::isExtLoad
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
Definition:TargetLowering.h:3033
llvm::TargetLoweringBase::isSExtCheaperThanZExt
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
Definition:TargetLowering.h:3085
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition:TargetLowering.h:364
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition:TargetLowering.h:3066
llvm::TargetLoweringBase::TypePromoteInteger
@ TypePromoteInteger
Definition:TargetLowering.h:211
llvm::TargetLoweringBase::enableExtLdPromotion
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
Definition:TargetLowering.h:946
llvm::TargetLoweringBase::isCheapToSpeculateCttz
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition:TargetLowering.h:707
llvm::TargetLoweringBase::isJumpExpensive
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
Definition:TargetLowering.h:617
llvm::TargetLoweringBase::hasExtractBitsInsn
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
Definition:TargetLowering.h:513
llvm::TargetLoweringBase::SelectSupportKind
SelectSupportKind
Enum that describes what type of support for selects the target has.
Definition:TargetLowering.h:241
llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition:TargetLowering.h:1918
llvm::TargetLoweringBase::isSlowDivBypassed
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
Definition:TargetLowering.h:604
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition:TargetLowering.h:2972
llvm::TargetLoweringBase::getTypeToTransformTo
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition:TargetLowering.h:1156
llvm::TargetLoweringBase::getPreferredSwitchConditionType
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
Definition:TargetLoweringBase.cpp:1652
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
Definition:TargetLowering.h:1630
llvm::TargetLoweringBase::canCombineStoreAndExtract
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
Definition:TargetLowering.h:951
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition:TargetLowering.h:1093
llvm::TargetLoweringBase::isFreeAddrSpaceCast
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Definition:TargetLoweringBase.cpp:918
llvm::TargetLoweringBase::shouldConsiderGEPOffsetSplit
virtual bool shouldConsiderGEPOffsetSplit() const
Definition:TargetLowering.h:3380
llvm::TargetLoweringBase::hasMultipleConditionRegisters
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
Definition:TargetLowering.h:508
llvm::TargetLoweringBase::isExtFree
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
Definition:TargetLowering.h:3008
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition:TargetLowering.h:1339
llvm::TargetLoweringBase::isPredictableSelectExpensive
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
Definition:TargetLowering.h:657
llvm::TargetLoweringBase::isMultiStoresCheaperThanBitsMerge
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
Definition:TargetLowering.h:742
llvm::TargetLoweringBase::getAddrModeArguments
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
Definition:TargetLowering.h:2802
llvm::TargetLoweringBase::isLoadExtLegal
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Definition:TargetLowering.h:1467
llvm::TargetLoweringBase::getBypassSlowDivWidths
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
Definition:TargetLowering.h:608
llvm::TargetLoweringBase::isCheapToSpeculateCtlz
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition:TargetLowering.h:712
llvm::TargetLoweringBase::useSoftFloat
virtual bool useSoftFloat() const
Definition:TargetLowering.h:366
llvm::TargetLoweringBase::getPreferredLargeGEPBaseOffset
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
Definition:TargetLowering.h:2846
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition:TargetLowering.h:1143
llvm::TargetLoweringBase::shouldAlignPointerArgs
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Definition:TargetLowering.h:2114
llvm::TargetLoweringBase::shouldConvertSplatType
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Definition:TargetLowering.h:2884
llvm::TargetLoweringBase::addressingModeSupportsTLS
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
Definition:TargetLowering.h:2841
llvm::TargetLoweringBase::shouldConvertPhiType
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
Definition:TargetLowering.h:2891
llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition:TargetLowering.h:3223
llvm::TargetLoweringBase::preferZeroCompareBranch
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
Definition:TargetLowering.h:738
llvm::TargetLoweringBase::isLegalAddressingMode
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition:TargetLoweringBase.cpp:1911
llvm::TargetLoweringBase::optimizeExtendOrTruncateConversion
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
Definition:TargetLowering.h:3096
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition:TargetLowering.h:3780
llvm::TargetLowering::C_Memory
@ C_Memory
Definition:TargetLowering.h:4953
llvm::TargetLowering::AsmOperandInfoVector
std::vector< AsmOperandInfo > AsmOperandInfoVector
Definition:TargetLowering.h:5008
llvm::TargetLowering::ExpandInlineAsm
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
Definition:TargetLowering.h:4946
llvm::TargetLowering::ParseConstraints
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
Definition:TargetLowering.cpp:5731
llvm::TargetLowering::ComputeConstraintToUse
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
Definition:TargetLowering.cpp:6088
llvm::TargetLowering::mayBeEmittedAsTailCall
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Definition:TargetLowering.h:4814
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition:TargetMachine.h:77
llvm::TargetMachine::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Definition:TargetMachine.h:330
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition:TargetPassConfig.h:85
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition:TargetRegisterInfo.h:235
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition:TargetSubtargetInfo.h:63
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition:TargetSubtargetInfo.h:129
llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition:TargetSubtargetInfo.h:101
llvm::TargetSubtargetInfo::addrSinkUsingGEPs
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Definition:TargetSubtargetInfo.h:301
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition:TargetTransformInfo.h:3250
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition:TargetTransformInfo.h:212
llvm::TargetTransformInfo::isExpensiveToSpeculativelyExecute
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
Definition:TargetTransformInfo.cpp:703
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition:TargetTransformInfo.h:263
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition:TargetTransformInfo.h:264
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition:TargetTransformInfo.h:267
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition:TargetTransformInfo.cpp:940
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition:TargetTransformInfo.cpp:728
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition:TargetTransformInfo.cpp:817
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition:TargetTransformInfo.cpp:279
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition:TargetTransformInfo.h:290
llvm::TargetTransformInfo::isVectorShiftByScalarCheap
bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
Definition:TargetTransformInfo.cpp:1434
llvm::TargetTransformInfo::isProfitableToSinkOperands
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
Definition:TargetTransformInfo.cpp:1429
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
Definition:TargetTransformInfo.cpp:1079
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition:TargetTransformInfo.h:1121
llvm::TruncInst
This class represents a truncation of integer types.
Definition:Instructions.h:4503
llvm::TypeSize
Definition:TypeSize.h:334
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition:Type.h:270
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
llvm::Type::isScalableTy
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
llvm::Type::isIntOrPtrTy
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition:Type.h:252
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition:Type.h:355
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition:Constants.cpp:1859
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition:Use.h:43
llvm::User
Definition:User.h:44
llvm::User::operands
op_range operands()
Definition:User.h:288
llvm::User::replaceUsesOfWith
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition:User.cpp:21
llvm::User::getOperandUse
const Use & getOperandUse(unsigned i) const
Definition:User.h:241
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition:User.h:233
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition:User.h:228
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition:User.h:250
llvm::ValueMap
See the file comment.
Definition:ValueMap.h:84
llvm::ValueMap::clear
void clear()
Definition:ValueMap.h:145
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition:Value.h:740
llvm::Value::user_begin
user_iterator user_begin()
Definition:Value.h:397
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition:Value.cpp:377
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition:Value.h:434
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition:Value.cpp:534
llvm::Value::users
iterator_range< user_iterator > users()
Definition:Value.h:421
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition:Value.cpp:927
llvm::Value::isUsedInBasicBlock
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition:Value.cpp:234
llvm::Value::hasNUsesOrMore
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition:Value.cpp:153
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition:Value.cpp:694
llvm::Value::use_empty
bool use_empty() const
Definition:Value.h:344
llvm::Value::user_end
user_iterator user_end()
Definition:Value.h:405
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition:Value.cpp:1075
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition:Value.cpp:255
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition:Value.h:376
llvm::Value::mutateType
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition:Value.h:819
llvm::Value::user_iterator
user_iterator_impl< User > user_iterator
Definition:Value.h:390
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition:Value.cpp:309
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition:Value.cpp:383
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition:AsmWriter.cpp:5304
llvm::WeakTrackingVH
Value handle that is nullable, but tries to track the Value.
Definition:ValueHandle.h:204
llvm::WeakTrackingVH::pointsToAliveValue
bool pointsToAliveValue() const
Definition:ValueHandle.h:224
llvm::ZExtInst
This class represents zero extension of integer types.
Definition:Instructions.h:4569
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition:CommandLine.h:399
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition:TypeSize.h:202
llvm::details::FixedOrScalableQuantity::isNonZero
constexpr bool isNonZero() const
Definition:TypeSize.h:158
llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition:TypeSize.h:171
llvm::generic_gep_type_iterator
Definition:GetElementPtrTypeIterator.h:31
llvm::generic_gep_type_iterator::getStructTypeOrNull
StructType * getStructTypeOrNull() const
Definition:GetElementPtrTypeIterator.h:166
llvm::generic_gep_type_iterator::getSequentialElementStride
TypeSize getSequentialElementStride(const DataLayout &DL) const
Definition:GetElementPtrTypeIterator.h:154
llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition:ilist_node.h:32
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition:ilist_node.h:132
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition:ilist_node.h:353
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition:raw_ostream.h:52
uint32_t
uint64_t
unsigned
Analysis.h
DebugInfo.h
ErrorHandling.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
TargetMachine.h
false
Definition:StackSlotColoring.cpp:193
llvm::AArch64PACKey::IB
@ IB
Definition:AArch64BaseInfo.h:876
llvm::AMDGPUISD::BFI
@ BFI
Definition:AMDGPUISelLowering.h:496
llvm::ARM_MB::ST
@ ST
Definition:ARMBaseInfo.h:73
llvm::ARM::ProfileKind::M
@ M
llvm::COFF::Entry
@ Entry
Definition:COFF.h:844
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition:CallingConv.h:24
llvm::HexagonMCInstrInfo::getAddrMode
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
Definition:HexagonMCInstrInfo.cpp:248
llvm::ISD::UADDO
@ UADDO
Definition:ISDOpcodes.h:331
llvm::ISD::USUBO
@ USUBO
Definition:ISDOpcodes.h:335
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition:ISDOpcodes.h:748
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition:ISDOpcodes.h:1590
llvm::M68kBeads::Term
@ Term
Definition:M68kBaseInfo.h:116
llvm::M68k::MemAddrModeKind::U
@ U
llvm::M68k::MemAddrModeKind::V
@ V
llvm::M68k::MemAddrModeKind::f
@ f
llvm::M68k::MemAddrModeKind::L
@ L
llvm::MipsISD::Ext
@ Ext
Definition:MipsISelLowering.h:157
llvm::NVPTXISD::Dummy
@ Dummy
Definition:NVPTXISelLowering.h:70
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition:NVPTX.h:163
llvm::PatternMatch
Definition:PatternMatch.h:47
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition:PatternMatch.h:524
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1102
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition:PatternMatch.h:100
llvm::PatternMatch::m_NUWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1314
llvm::PatternMatch::m_URem
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1198
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition:PatternMatch.h:165
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition:PatternMatch.h:982
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition:PatternMatch.h:49
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition:PatternMatch.h:826
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition:PatternMatch.h:885
llvm::PatternMatch::m_Shr
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition:PatternMatch.h:1525
llvm::PatternMatch::m_c_NUWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1323
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition:PatternMatch.h:168
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition:PatternMatch.h:592
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition:PatternMatch.h:1799
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition:PatternMatch.h:599
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition:PatternMatch.h:67
llvm::PatternMatch::m_LogicalOr
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Definition:PatternMatch.h:3099
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition:PatternMatch.h:1911
llvm::PatternMatch::m_ImmConstant
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition:PatternMatch.h:864
llvm::PatternMatch::m_ZExt
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition:PatternMatch.h:2107
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition:PatternMatch.h:105
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition:PatternMatch.h:2220
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition:PatternMatch.h:299
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition:PatternMatch.h:92
llvm::PatternMatch::m_NSWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1281
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Definition:PatternMatch.h:1627
llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1234
llvm::PatternMatch::m_UAddWithOverflow
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
Definition:PatternMatch.h:2548
llvm::PatternMatch::m_LogicalAnd
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
Definition:PatternMatch.h:3081
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition:PatternMatch.h:152
llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition:PatternMatch.h:2805
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition:PatternMatch.h:1829
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1114
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition:PatternMatch.h:239
llvm::SIEncodingFamily::VI
@ VI
Definition:SIDefines.h:37
llvm::SIEncodingFamily::SI
@ SI
Definition:SIDefines.h:36
llvm::SPII::Load
@ Load
Definition:SparcInstrInfo.h:32
llvm::ScaledNumbers::compare
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition:ScaledNumber.h:252
llvm::SystemZISD::TM
@ TM
Definition:SystemZISelLowering.h:66
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::dwarf::Index
Index
Definition:Dwarf.h:882
llvm::dwarf::Constants
Constants
Definition:Dwarf.h:842
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition:PointerTypeAnalysis.cpp:191
llvm::logicalview::LVAttributeKind::Location
@ Location
llvm::lowertypetests::DropTestKind::Assume
@ Assume
Do not drop type tests (default).
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::msgpack::Type::Map
@ Map
llvm::objcopy::AdjustKind::Set
@ Set
llvm::pdb::PDB_SymType::Callee
@ Callee
llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition:RDFGraph.h:390
llvm::sampleprof::Base
@ Base
Definition:Discriminator.h:58
llvm::sys::path::begin
const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition:Path.cpp:226
llvm::sys::path::end
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition:Path.cpp:235
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition:STLExtras.h:329
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition:SparseBitVector.h:877
llvm::Offset
@ Offset
Definition:DWP.cpp:480
llvm::RemoveRedundantDbgInstrs
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
Definition:BasicBlockUtils.cpp:685
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1739
llvm::PseudoProbeType::Block
@ Block
llvm::popcount
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition:bit.h:385
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition:STLExtras.h:1697
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition:Local.cpp:546
llvm::ConstantFoldTerminator
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition:Local.cpp:136
llvm::Depth
@ Depth
Definition:SIMachineScheduler.h:36
llvm::pred_end
auto pred_end(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1385
llvm::operator*
APInt operator*(APInt a, uint64_t RHS)
Definition:APInt.h:2204
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition:Alignment.h:145
llvm::LibFunc
LibFunc
Definition:TargetLibraryInfo.h:68
llvm::salvageDebugInfo
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition:Utils.cpp:1683
llvm::successors
auto successors(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1376
llvm::FoldReturnIntoUncondBranch
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
Definition:BasicBlockUtils.cpp:1551
llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition:APInt.h:2082
llvm::DiagnosticPredicateTy::Match
@ Match
llvm::SplitBlockAndInsertIfElse
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
Definition:BasicBlockUtils.cpp:1622
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition:STLExtras.h:2115
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition:MachineSizeOpts.cpp:27
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition:STLExtras.h:657
llvm::DeleteDeadBlock
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Definition:BasicBlockUtils.cpp:96
llvm::unique
auto unique(Range &&R, Predicate P)
Definition:STLExtras.h:2055
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition:VectorUtils.cpp:312
llvm::ExpandVariadicsMode::Optimize
@ Optimize
llvm::initializeCodeGenPrepareLegacyPassPass
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
llvm::hasBranchWeightOrigin
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
Definition:ProfDataUtils.cpp:122
llvm::findDbgValues
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition:DebugInfo.cpp:155
llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition:AddressRanges.h:153
llvm::SplitIndirectBrCriticalEdges
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Definition:BreakCriticalEdges.cpp:338
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition:GCNRegPressure.cpp:227
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Definition:InstructionSimplify.cpp:7234
llvm::simplifyAddInst
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Definition:InstructionSimplify.cpp:656
llvm::getKnownAlignment
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition:Local.h:242
llvm::erase
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition:STLExtras.h:2107
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition:VectorUtils.cpp:327
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition:BasicBlockUtils.cpp:164
llvm::replaceAndRecursivelySimplify
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
Definition:InstructionSimplify.cpp:7310
llvm::HexPrintStyle::Upper
@ Upper
llvm::reverse
auto reverse(ContainerTy &&C)
Definition:STLExtras.h:420
llvm::recognizeBSwapOrBitReverseIdiom
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition:Local.cpp:4096
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition:STLExtras.h:1664
llvm::FPClassTest
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
Definition:FloatingPointMode.h:239
llvm::fcInf
@ fcInf
Definition:FloatingPointMode.h:254
llvm::fcNan
@ fcNan
Definition:FloatingPointMode.h:253
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition:BasicBlockUtils.cpp:1635
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition:Debug.cpp:163
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1753
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition:ValueTracking.cpp:7043
llvm::createCodeGenPrepareLegacyPass
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
Definition:CodeGenPrepare.cpp:541
llvm::getFCmpCondCode
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition:Analysis.cpp:199
llvm::VerifyLoopInfo
bool VerifyLoopInfo
Enable verification of loop info.
Definition:LoopInfo.cpp:51
llvm::isKnownNonZero
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
Definition:ValueTracking.cpp:3487
llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
llvm::attributesPermitTailCall
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition:Analysis.cpp:584
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
Definition:BasicBlockUtils.cpp:180
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::RecurKind::Add
@ Add
Sum of integers.
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition:STLExtras.h:1938
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition:APFixedPoint.h:303
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition:ValueTracking.cpp:7841
llvm::BitWidth
constexpr unsigned BitWidth
Definition:BitmaskEnum.h:217
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition:ProfDataUtils.cpp:170
llvm::pred_begin
auto pred_begin(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1383
llvm::bypassSlowDivision
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition:GetElementPtrTypeIterator.h:173
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition:STLExtras.h:2099
llvm::fcmpToClassTest
std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
Definition:ValueTracking.cpp:4519
llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1377
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition:STLExtras.h:1903
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition:Alignment.h:212
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition:CFG.h:118
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition:BasicBlockUtils.cpp:1609
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition:BasicBlockUtils.cpp:762
llvm::filterDbgVars
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Definition:DebugProgramInstruction.h:555
llvm::simplifyURemInst
Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
Definition:InstructionSimplify.cpp:1260
llvm::getCGPassBuilderOption
CGPassBuilderOption getCGPassBuilderOption()
Definition:TargetPassConfig.cpp:476
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860
raw_ostream.h
NC
#define NC
Definition:regutils.h:42
NodeList
Definition:MicrosoftDemangle.cpp:38
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::EVT
Extended Value Type.
Definition:ValueTypes.h:35
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition:ValueTypes.h:279
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition:ValueTypes.h:295
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition:ValueTypes.h:368
llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition:ValueTypes.cpp:289
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition:ValueTypes.h:311
llvm::EVT::isRound
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition:ValueTypes.h:243
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition:ValueTypes.h:152
llvm::ExtAddrMode
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
Definition:TargetInstrInfo.h:93
llvm::ExtAddrMode::BaseReg
Register BaseReg
Definition:TargetInstrInfo.h:100
llvm::ExtAddrMode::ScaledReg
Register ScaledReg
Definition:TargetInstrInfo.h:101
llvm::ExtAddrMode::ExtAddrMode
ExtAddrMode()=default
llvm::ExtAddrMode::Scale
int64_t Scale
Definition:TargetInstrInfo.h:102
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition:Alignment.h:117
llvm::OptimizedStructLayoutField
A field in a structure.
Definition:OptimizedStructLayout.h:45
llvm::PatternMatch::m_ZeroMask
Definition:PatternMatch.h:1868
llvm::SimplifyQuery
Definition:SimplifyQuery.h:70
llvm::SimplifyQuery::DL
const DataLayout & DL
Definition:SimplifyQuery.h:71
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Definition:TargetLowering.h:2816
llvm::TargetLowering::AsmOperandInfo
This contains information for each constraint that we are lowering.
Definition:TargetLowering.h:4977
llvm::TargetTransformInfo::OperandValueInfo
Definition:TargetTransformInfo.h:1135
llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition:TargetTransformInfo.h:1136
llvm::cl::desc
Definition:CommandLine.h:409

Generated on Fri Jul 18 2025 10:32:19 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp