1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This pass munges the code in the input function to better prepare it for 10// SelectionDAG-based code generation. This works around limitations in it's 11// basic-block-at-a-time approach. It should eventually be removed. 13//===----------------------------------------------------------------------===// 44#include "llvm/Config/llvm-config.h" 65#include "llvm/IR/IntrinsicsAArch64.h" 109#define DEBUG_TYPE "codegenprepare" 112STATISTIC(NumPHIsElim,
"Number of trivial PHIs eliminated");
113STATISTIC(NumGEPsElim,
"Number of GEPs converted to casts");
114STATISTIC(NumCmpUses,
"Number of uses of Cmp expressions replaced with uses of " 116STATISTIC(NumCastUses,
"Number of uses of Cast expressions replaced with uses " 118STATISTIC(NumMemoryInsts,
"Number of memory instructions whose address " 119"computations were sunk");
121"Number of phis created when address " 122"computations were sunk to memory instructions");
124"Number of select created when address " 125"computations were sunk to memory instructions");
126STATISTIC(NumExtsMoved,
"Number of [s|z]ext instructions combined with loads");
127STATISTIC(NumExtUses,
"Number of uses of [s|z]ext instructions optimized");
129"Number of and mask instructions added to form ext loads");
130STATISTIC(NumAndUses,
"Number of uses of and mask instructions optimized");
131STATISTIC(NumRetsDup,
"Number of return instructions duplicated");
132STATISTIC(NumDbgValueMoved,
"Number of debug value instructions moved");
133STATISTIC(NumSelectsExpanded,
"Number of selects turned into branches");
134STATISTIC(NumStoreExtractExposed,
"Number of store(extractelement) exposed");
138cl::desc(
"Disable branch optimizations in CodeGenPrepare"));
142cl::desc(
"Disable GC optimizations in CodeGenPrepare"));
147cl::desc(
"Disable select to branch conversion."));
151cl::desc(
"Address sinking in CGP using GEPs."));
155cl::desc(
"Enable sinking and/cmp into branches."));
159cl::desc(
"Disable store(extract) optimizations in CodeGenPrepare"));
163cl::desc(
"Stress test store(extract) optimizations in CodeGenPrepare"));
167cl::desc(
"Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " 172cl::desc(
"Stress test ext(promotable(ld)) -> promoted(ext(ld)) " 173"optimization in CodeGenPrepare"));
177cl::desc(
"Disable protection against removing loop preheaders"));
181cl::desc(
"Use profile info to add section prefix for hot/cold functions"));
184"profile-unknown-in-special-section",
cl::Hidden,
185cl::desc(
"In profiling mode like sampleFDO, if a function doesn't have " 186"profile, we cannot tell the function is cold for sure because " 187"it may be a function newly added without ever being sampled. " 188"With the flag enabled, compiler can put such profile unknown " 189"functions into a special section, so runtime system can choose " 190"to handle it in a different way than .text section, to save " 191"RAM for example. "));
195cl::desc(
"Use the basic-block-sections profile to determine the text " 196"section prefix for hot functions. Functions with " 197"basic-block-sections profile will be placed in `.text.hot` " 198"regardless of their FDO profile info. Other functions won't be " 199"impacted, i.e., their prefixes will be decided by FDO/sampleFDO " 204cl::desc(
"Skip merging empty blocks if (frequency of empty block) / " 205"(frequency of destination block) is greater than this ratio"));
209cl::desc(
"Force store splitting no matter what the target query says."));
213cl::desc(
"Enable merging of redundant sexts when one is dominating" 219cl::desc(
"Disables combining addressing modes with different parts " 220"in optimizeMemoryInst."));
224cl::desc(
"Allow creation of Phis in Address sinking."));
228cl::desc(
"Allow creation of selects in Address sinking."));
232cl::desc(
"Allow combining of BaseReg field in Address sinking."));
236cl::desc(
"Allow combining of BaseGV field in Address sinking."));
240cl::desc(
"Allow combining of BaseOffs field in Address sinking."));
244cl::desc(
"Allow combining of ScaledReg field in Address sinking."));
249cl::desc(
"Enable splitting large offset of GEP."));
253cl::desc(
"Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
257cl::desc(
"Enable BFI update verification for " 262cl::desc(
"Enable converting phi types in CodeGenPrepare"));
266cl::desc(
"Least BB number of huge function."));
271cl::desc(
"Max number of address users to look at"));
275cl::desc(
"Disable elimination of dead PHI nodes."));
280 ZeroExtension,
// Zero extension has been seen. 281 SignExtension,
// Sign extension has been seen. 282 BothExtension
// This extension type is used if we saw sext after 283// ZeroExtension had been set, or if we saw zext after 284// SignExtension had been set. It makes the type 285// information of a promoted instruction invalid. 289 NotModifyDT,
// Not Modify any DT. 290 ModifyBBDT,
// Modify the Basic Block Dominator Tree. 291 ModifyInstDT
// Modify the Instruction Dominator in a Basic Block, 292// This usually means we move/delete/insert instruction 293// in a Basic Block. So we should re-iterate instructions 294// in such Basic Block. 303classTypePromotionTransaction;
306friendclassCodeGenPrepareLegacyPass;
315 std::unique_ptr<BlockFrequencyInfo>
BFI;
316 std::unique_ptr<BranchProbabilityInfo> BPI;
319 /// As we scan instructions optimizing them, this is the next instruction 320 /// to optimize. Transforms that can invalidate this should update it. 323 /// Keeps track of non-local addresses that have been sunk into a block. 324 /// This allows us to avoid inserting duplicate code for blocks with 325 /// multiple load/stores of the same address. The usage of WeakTrackingVH 326 /// enables SunkAddrs to be treated as a cache whose entries can be 327 /// invalidated if a sunken address computation has been erased. 330 /// Keeps track of all instructions inserted for the current function. 331 SetOfInstrs InsertedInsts;
333 /// Keeps track of the type of the related instruction before their 334 /// promotion for the current function. 335 InstrToOrigTy PromotedInsts;
337 /// Keep track of instructions removed during promotion. 338 SetOfInstrs RemovedInsts;
340 /// Keep track of sext chains based on their initial value. 343 /// Keep track of GEPs accessing the same data structures such as structs or 344 /// arrays that are candidates to be split later because of their large 350 /// Keep track of new GEP base after splitting the GEPs having large offset. 353 /// Map serial numbers to Large offset GEPs. 356 /// Keep track of SExt promoted. 357 ValueToSExts ValToSExtendedUses;
359 /// True if the function has the OptSize attribute. 362 /// DataLayout for the Function being processed. 365 /// Building the dominator tree can be expensive, so we only build it 366 /// lazily and update it when required. 367 std::unique_ptr<DominatorTree> DT;
372 /// If encounter huge function, we need to limit the build time. 373bool IsHugeFunc =
false;
375 /// FreshBBs is like worklist, it collected the updated BBs which need 376 /// to be optimized again. 377 /// Note: Consider building time in this pass, when a BB updated, we need 378 /// to insert such BB into FreshBBs for huge function. 381void releaseMemory() {
382// Clear per function information. 383 InsertedInsts.
clear();
384 PromotedInsts.clear();
394void resetIteratorIfInvalidatedWhileCalling(
BasicBlock *BB,
F f) {
395// Substituting can cause recursive simplifications, which can invalidate 396// our iterator. Use a WeakTrackingVH to hold onto it in case this 398Value *CurValue = &*CurInstIterator;
403// If the iterator instruction was recursively deleted, start over at the 404// start of the block. 405if (IterHandle != CurValue) {
406 CurInstIterator = BB->
begin();
411// Get the DominatorTree, building if necessary. 414 DT = std::make_unique<DominatorTree>(
F);
418void removeAllAssertingVHReferences(
Value *V);
421bool eliminateMostlyEmptyBlocks(
Function &
F);
424void eliminateMostlyEmptyBlock(
BasicBlock *BB);
433bool optimizeInlineAsmInst(
CallInst *CS);
437bool optimizeLoadExt(
LoadInst *Load);
443bool optimizeSwitchPhiConstants(
SwitchInst *SI);
446bool dupRetToEnableTailCallOpts(
BasicBlock *BB, ModifyDT &ModifiedDT);
454bool tryToPromoteExts(TypePromotionTransaction &TPT,
457unsigned CreatedInstsCost = 0);
459bool splitLargeGEPOffsets();
463bool performAddressTypePromotion(
464Instruction *&Inst,
bool AllowPromotionWithoutCommonHeader,
465bool HasPromoted, TypePromotionTransaction &TPT,
467bool splitBranchCondition(
Function &
F, ModifyDT &ModifiedDT);
473bool optimizeCmp(
CmpInst *Cmp, ModifyDT &ModifiedDT);
475bool combineToUSubWithOverflow(
CmpInst *Cmp, ModifyDT &ModifiedDT);
476bool combineToUAddWithOverflow(
CmpInst *Cmp, ModifyDT &ModifiedDT);
483staticcharID;
// Pass identification, replacement for typeid 494// FIXME: When we can selectively preserve passes, preserve the domtree. 504}
// end anonymous namespace 506char CodeGenPrepareLegacyPass::ID = 0;
508bool CodeGenPrepareLegacyPass::runOnFunction(
Function &
F) {
512 CodeGenPrepare CGP(TM);
513 CGP.DL = &
F.getDataLayout();
514 CGP.SubtargetInfo =
TM->getSubtargetImpl(
F);
515 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
516 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
517 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
518 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
519 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
522 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
525 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() :
nullptr;
531"Optimize for code generation",
false,
false)
542returnnew CodeGenPrepareLegacyPass();
547 CodeGenPrepare CGP(TM);
549bool Changed = CGP.run(
F, AM);
561DL = &
F.getDataLayout();
562 SubtargetInfo = TM->getSubtargetImpl(
F);
572 BBSectionsProfileReader =
578bool EverMadeChange =
false;
580 OptSize =
F.hasOptSize();
581// Use the basic-block-sections profile to promote hot functions to .text.hot 585F.setSectionPrefix(
"hot");
587// The hot attribute overwrites profile count based hotness while profile 588// counts based hotness overwrite the cold attribute. 589// This is a conservative behabvior. 590if (
F.hasFnAttribute(Attribute::Hot) ||
591 PSI->isFunctionHotInCallGraph(&
F, *BFI))
592F.setSectionPrefix(
"hot");
593// If PSI shows this function is not hot, we will placed the function 594// into unlikely section if (1) PSI shows this is a cold function, or 595// (2) the function has a attribute of cold. 596elseif (PSI->isFunctionColdInCallGraph(&
F, *BFI) ||
597F.hasFnAttribute(Attribute::Cold))
598F.setSectionPrefix(
"unlikely");
600 PSI->isFunctionHotnessUnknown(
F))
601F.setSectionPrefix(
"unknown");
604 /// This optimization identifies DIV instructions that can be 605 /// profitably bypassed and carried out with a shorter, faster divide. 610while (BB !=
nullptr) {
611// bypassSlowDivision may create new BBs, but we don't want to reapply the 612// optimization to those blocks. 620// Get rid of @llvm.assume builtins before attempting to eliminate empty 621// blocks, since there might be blocks that only contain @llvm.assume calls 622// (plus arguments that we can get rid of). 623 EverMadeChange |= eliminateAssumptions(
F);
625// Eliminate blocks that contain only PHI nodes and an 626// unconditional branch. 627 EverMadeChange |= eliminateMostlyEmptyBlocks(
F);
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(
F, ModifiedDT);
633// Split some critical edges where one of the sources is an indirect branch, 634// to help generate sane code for PHIs involving such edges. 638// If we are optimzing huge function, we need to consider the build time. 639// Because the basic algorithm's complex is near O(N!). 642// Transformations above may invalidate dominator tree and/or loop info. 645 LI->analyze(getDT(
F));
647bool MadeChange =
true;
648bool FuncIterated =
false;
653if (FuncIterated && !FreshBBs.
contains(&BB))
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
659if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
662 MadeChange |= Changed;
664// If the BB is updated, it may still has chance to be optimized. 665// This usually happen at sink optimization. 669// %and = and i32 %a, 4 670// %cmp = icmp eq i32 %and, 0 672// If the %cmp sink to other BB, the %and will has chance to sink. 678// For small/normal functions, we restart BB iteration if the dominator 679// tree of the Function was changed. 680if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
684// We have iterated all the BB in the (only work for huge) function. 685 FuncIterated = IsHugeFunc;
688 MadeChange |= mergeSExts(
F);
689if (!LargeOffsetGEPMap.
empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(
F);
694 eliminateFallThrough(
F, DT.get());
698 LI->verify(getDT(
F));
701// Really free removed instructions during promotion. 705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.
clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.
clear();
710 LargeOffsetGEPID.
clear();
718// Use a set vector to get deterministic iteration order. The order the 719// blocks are removed may affect whether or not PHI nodes in successors 733// Delete the dead blocks and any of their dead successors. 734 MadeChange |= !WorkList.
empty();
735while (!WorkList.
empty()) {
746// Merge pairs of basic blocks with unconditional branches, connected by 748if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(
F);
751 EverMadeChange |= MadeChange;
758if (
auto *SP = dyn_cast<GCStatepointInst>(&
I))
760for (
auto &
I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*
I);
764// Do this last to clean up use-before-def scenarios introduced by other 765// preparatory transforms. 766 EverMadeChange |= placeDbgValues(
F);
767 EverMadeChange |= placePseudoProbes(
F);
774return EverMadeChange;
777bool CodeGenPrepare::eliminateAssumptions(
Function &
F) {
778bool MadeChange =
false;
780 CurInstIterator = BB.begin();
781while (CurInstIterator != BB.end()) {
783if (
auto *Assume = dyn_cast<AssumeInst>(
I)) {
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
797/// An instruction is about to be deleted, so remove all references to it in our 798/// GEP-tracking data strcutures. 799void CodeGenPrepare::removeAllAssertingVHReferences(
Value *V) {
800 LargeOffsetGEPMap.
erase(V);
801 NewGEPBases.
erase(V);
803autoGEP = dyn_cast<GetElementPtrInst>(V);
809auto VecI = LargeOffsetGEPMap.
find(
GEP->getPointerOperand());
810if (VecI == LargeOffsetGEPMap.
end())
813auto &GEPVector = VecI->second;
816if (GEPVector.empty())
817 LargeOffsetGEPMap.
erase(VecI);
820// Verify BFI has been updated correctly by recomputing BFI and comparing them. 826 NewBFI.verifyMatch(*BFI);
829/// Merge basic blocks which are connected by a single edge, where one of the 830/// basic blocks has a single successor pointing to the other basic block, 831/// which has a single predecessor. 834// Scan all of the blocks in the function, except for the entry block. 835// Use a temporary array to avoid iterator being invalidated when 843auto *BB = cast_or_null<BasicBlock>(
Block);
846// If the destination block has a single pred, then this is a trivial 847// edge, just collapse it. 850// Don't merge if BB's address is taken. 851if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
854// Make an effort to skip unreachable blocks. 859if (Term && !
Term->isConditional()) {
863// Merge BB into SinglePred and delete it. 866/* PredecessorWithTwoSuccessors */false, DT);
870// Update FreshBBs to optimize the merged BB. 871 FreshBBs.
insert(SinglePred);
877// (Repeatedly) merging blocks into their predecessors can create redundant 879for (
constauto &Pred : Preds)
880if (
auto *BB = cast_or_null<BasicBlock>(Pred))
886/// Find a destination block from BB if BB is mergeable empty block. 888// If this block doesn't end with an uncond branch, ignore it. 893// If the instruction before the branch (skipping debug info) isn't a phi 894// node, then other stuff is happening here. 896if (BBI != BB->
begin()) {
898while (isa<DbgInfoIntrinsic>(BBI)) {
899if (BBI == BB->
begin())
903if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
907// Do not break infinite loops. 912if (!canMergeBlocks(BB, DestBB))
918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an 919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split 920/// edges in ways that are non-optimal for isel. Start by eliminating these 921/// blocks so we can split them the way we want them. 922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(
Function &
F) {
925while (!LoopList.empty()) {
926Loop *
L = LoopList.pop_back_val();
929 Preheaders.
insert(Preheader);
932bool MadeChange =
false;
933// Copy blocks into a temporary array to avoid iterator invalidation issues 935// Note that this intentionally skips the entry block. 938// Delete phi nodes that could block deleting other empty blocks. 948BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.
count(BB)))
953 eliminateMostlyEmptyBlock(BB);
959bool CodeGenPrepare::isMergingEmptyBlockProfitable(
BasicBlock *BB,
962// Do not delete loop preheaders if doing so would create a critical edge. 963// Loop preheaders can be good locations to spill registers. If the 964// preheader is deleted and we create a critical edge, registers may be 965// spilled in the loop body instead. 971// Skip merging if the block's successor is also a successor to any callbr 972// that leads to this block. 973// FIXME: Is this really needed? Is this a correctness issue? 975if (isa<CallBrInst>(Pred->getTerminator()) &&
980// Try to skip merging if the unique predecessor of BB is terminated by a 981// switch or indirect branch instruction, and BB is used as an incoming block 982// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to 983// add COPY instructions in the predecessor of BB instead of BB (if it is not 984// merged). Note that the critical edge created by merging such blocks wont be 985// split in MachineSink because the jump table is not analyzable. By keeping 986// such empty block (BB), ISel will place COPY instructions in BB, not in the 996// We use a simple cost heuristic which determine skipping merging is 997// profitable if the cost of skipping merging is less than the cost of 998// merging : Cost(skipping merging) < Cost(merging BB), where the 999// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and 1000// the Cost(merging BB) is Freq(Pred) * Cost(Copy). 1001// Assuming Cost(Copy) == Cost(Branch), we could simplify it to : 1002// Freq(Pred) / Freq(BB) > 2. 1003// Note that if there are multiple empty blocks sharing the same incoming 1004// value for the PHIs in the DestBB, we consider them together. In such 1005// case, Cost(merging BB) will be the sum of their frequencies. 1007if (!isa<PHINode>(DestBB->
begin()))
1012// Find all other incoming blocks from which incoming values of all PHIs in 1013// DestBB are the same as the ones from BB. 1015if (DestBBPred == BB)
1019 return DestPN.getIncomingValueForBlock(BB) ==
1020 DestPN.getIncomingValueForBlock(DestBBPred);
1022 SameIncomingValueBBs.
insert(DestBBPred);
1025// See if all BB's incoming values are same as the value from Pred. In this 1026// case, no reason to skip merging because COPYs are expected to be place in 1028if (SameIncomingValueBBs.
count(Pred))
1034for (
auto *SameValueBB : SameIncomingValueBBs)
1035if (SameValueBB->getUniquePredecessor() == Pred &&
1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1040return !Limit || PredFreq <= *Limit;
1043/// Return true if we can merge BB into DestBB if there is a single 1044/// unconditional branch between them, and BB contains no other non-phi 1046bool CodeGenPrepare::canMergeBlocks(
constBasicBlock *BB,
1048// We only want to eliminate blocks whose phi nodes are used by phi nodes in 1049// the successor. If there are more complex condition (e.g. preheaders), 1050// don't mess around with them. 1054if (UI->
getParent() != DestBB || !isa<PHINode>(UI))
1056// If User is inside DestBB block and it is a PHINode then check 1057// incoming value. If incoming value is not from BB then this is 1058// a complex condition (e.g. preheaders) we want to avoid here. 1060if (
constPHINode *UPN = dyn_cast<PHINode>(UI))
1061for (
unsignedI = 0, E = UPN->getNumIncomingValues();
I != E; ++
I) {
1063if (
Insn &&
Insn->getParent() == BB &&
1064Insn->getParent() != UPN->getIncomingBlock(
I))
1071// If BB and DestBB contain any common predecessors, then the phi nodes in BB 1072// and DestBB may have conflicting incoming values for the block. If so, we 1073// can't merge the block. 1074constPHINode *DestBBPN = dyn_cast<PHINode>(DestBB->
begin());
1076returntrue;
// no conflict. 1078// Collect the preds of BB. 1080if (
constPHINode *BBPN = dyn_cast<PHINode>(BB->
begin())) {
1081// It is faster to get preds from a PHI than with pred_iterator. 1082for (
unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1083 BBPreds.
insert(BBPN->getIncomingBlock(i));
1088// Walk the preds of DestBB. 1091if (BBPreds.
count(Pred)) {
// Common predecessor? 1093constValue *V1 = PN.getIncomingValueForBlock(Pred);
1094constValue *
V2 = PN.getIncomingValueForBlock(BB);
1096// If V2 is a phi node in BB, look up what the mapped value will be. 1097if (
constPHINode *V2PN = dyn_cast<PHINode>(V2))
1098if (V2PN->getParent() == BB)
1099V2 = V2PN->getIncomingValueForBlock(Pred);
1101// If there is a conflict, bail out. 1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs. 1115auto *OldI = dyn_cast<Instruction>(Old);
1127/// Eliminate a basic block that has only phi's and an unconditional branch in 1129void CodeGenPrepare::eliminateMostlyEmptyBlock(
BasicBlock *BB) {
1136// If the destination block has a single pred, then this is a trivial edge, 1139if (SinglePred != DestBB) {
1140assert(SinglePred == BB &&
1141"Single predecessor not the same as predecessor");
1142// Merge DestBB into SinglePred/BB and delete it. 1144// Note: BB(=SinglePred) will not be deleted on this path. 1145// DestBB(=its single successor) is the one that was deleted. 1149// Update FreshBBs to optimize the merged BB. 1150 FreshBBs.
insert(SinglePred);
1151 FreshBBs.
erase(DestBB);
1157// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 1158// to handle the new incoming edges it is about to have. 1160// Remove the incoming value for BB, and remember it. 1161Value *InVal = PN.removeIncomingValue(BB,
false);
1163// Two options: either the InVal is a phi node defined in BB or it is some 1164// value that dominates BB. 1165PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1166if (InValPhi && InValPhi->
getParent() == BB) {
1167// Add all of the input values of the input PHI as inputs of this phi. 1172// Otherwise, add one instance of the dominating value for each edge that 1173// we will be adding. 1175for (
unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1179 PN.addIncoming(InVal, Pred);
1184// Preserve loop Metadata. 1190// The PHIs are now updated, change everything that refers to BB to use 1191// DestBB and remove BB. 1199// Computes a map of base pointer relocation instructions to corresponding 1200// derived pointer relocation instructions given a vector of all relocate calls 1205// Collect information in two maps: one primarily for locating the base object 1206// while filling the second map; the second map is the final structure holding 1207// a mapping between Base and corresponding Derived relocate calls 1209for (
auto *ThisRelocate : AllRelocateCalls) {
1210auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1211 ThisRelocate->getDerivedPtrIndex());
1212 RelocateIdxMap.
insert(std::make_pair(K, ThisRelocate));
1214for (
auto &Item : RelocateIdxMap) {
1215 std::pair<unsigned, unsigned> Key = Item.first;
1216if (Key.first == Key.second)
1217// Base relocation: nothing to insert 1221auto BaseKey = std::make_pair(Key.first, Key.first);
1223// We're iterating over RelocateIdxMap so we cannot modify it. 1224auto MaybeBase = RelocateIdxMap.
find(BaseKey);
1225if (MaybeBase == RelocateIdxMap.
end())
1226// TODO: We might want to insert a new base object relocate and gep off 1227// that, if there are enough derived object relocates. 1230 RelocateInstMap[MaybeBase->second].push_back(
I);
1234// Accepts a GEP and extracts the operands into a vector provided they're all 1235// small integer constants 1238for (
unsigned i = 1; i <
GEP->getNumOperands(); i++) {
1239// Only accept small constant integer operands 1240auto *
Op = dyn_cast<ConstantInt>(
GEP->getOperand(i));
1241if (!
Op ||
Op->getZExtValue() > 20)
1245for (
unsigned i = 1; i <
GEP->getNumOperands(); i++)
1250// Takes a RelocatedBase (base pointer relocation instruction) and Targets to 1251// replace, computes a replacement, and affects it. 1255bool MadeChange =
false;
1256// We must ensure the relocation of derived pointer is defined after 1257// relocation of base pointer. If we find a relocation corresponding to base 1258// defined earlier than relocation of base then we move relocation of base 1259// right before found relocation. We consider only relocation in the same 1260// basic block as relocation of base. Relocations from other basic block will 1261// be skipped by optimization and we do not care about them. 1262for (
auto R = RelocatedBase->
getParent()->getFirstInsertionPt();
1263 &*R != RelocatedBase; ++R)
1264if (
auto *RI = dyn_cast<GCRelocateInst>(R))
1267 RelocatedBase->
moveBefore(RI->getIterator());
1274"Not relocating a derived object of the original base object");
1275if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1276// A duplicate relocate call. TODO: coalesce duplicates. 1280if (RelocatedBase->
getParent() != ToReplace->getParent()) {
1281// Base and derived relocates are in different basic blocks. 1282// In this case transform is only valid when base dominates derived 1283// relocate. However it would be too expensive to check dominance 1284// for each such relocate, so we skip the whole transformation. 1289auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1290if (!Derived || Derived->getPointerOperand() !=
Base)
1297// Create a Builder and replace the target callsite with a gep 1299"Should always have one since it's not a terminator");
1301// Insert after RelocatedBase 1305// If gc_relocate does not match the actual type, cast it to the right type. 1306// In theory, there must be a bitcast after gc_relocate if the type does not 1307// match, and we should reuse it to get the derived pointer. But it could be 1311// %g1 = call coldcc i8 addrspace(1)* 1312// @llvm.experimental.gc.relocate.p1i8(...) br label %merge 1316// %g2 = call coldcc i8 addrspace(1)* 1317// @llvm.experimental.gc.relocate.p1i8(...) br label %merge 1320// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] 1321// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* 1323// In this case, we can not find the bitcast any more. So we insert a new 1324// bitcast no matter there is already one or not. In this way, we can handle 1325// all cases, and the extra bitcast should be optimized away in later 1327Value *ActualRelocatedBase = RelocatedBase;
1329 ActualRelocatedBase =
1333 Builder.
CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1336// If the newly generated derived pointer's type does not match the original 1337// derived pointer's type, cast the new derived pointer to match it. Same 1338// reasoning as above. 1339Value *ActualReplacement = Replacement;
1340if (Replacement->
getType() != ToReplace->getType()) {
1345 ToReplace->eraseFromParent();
1355// %ptr = gep %base + 15 1356// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) 1357// %base' = relocate(%tok, i32 4, i32 4) 1358// %ptr' = relocate(%tok, i32 4, i32 5) 1364// %ptr = gep %base + 15 1365// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) 1366// %base' = gc.relocate(%tok, i32 4, i32 4) 1367// %ptr' = gep %base' + 15 1370bool MadeChange =
false;
1372for (
auto *U :
I.users())
1374// Collect all the relocate calls associated with a statepoint 1377// We need at least one base pointer relocation + one derived pointer 1378// relocation to mangle 1379if (AllRelocateCalls.
size() < 2)
1382// RelocateInstMap is a mapping from the base relocate instruction to the 1383// corresponding derived relocate instructions 1386if (RelocateInstMap.
empty())
1389for (
auto &Item : RelocateInstMap)
1390// Item.first is the RelocatedBase to offset against 1391// Item.second is the vector of Targets to replace 1396/// Sink the specified cast instruction into its user blocks. 1400 /// InsertedCasts - Only insert a cast in each block once. 1403bool MadeChange =
false;
1406Use &TheUse = UI.getUse();
1409// Figure out which BB this cast is used in. For PHI's this is the 1410// appropriate predecessor block. 1413 UserBB = PN->getIncomingBlock(TheUse);
1416// Preincrement use iterator so we don't invalidate it. 1419// The first insertion point of a block containing an EH pad is after the 1420// pad. If the pad is the user, we cannot sink the cast past the pad. 1424// If the block selected to receive the cast is an EH pad that does not 1425// allow non-PHI instructions before the terminator, we can't sink the 1430// If this user is in the same block as the cast, don't change the cast. 1434// If we have already inserted a cast into this block, use it. 1435CastInst *&InsertedCast = InsertedCasts[UserBB];
1440 InsertedCast = cast<CastInst>(CI->
clone());
1444// Replace a use of the cast with a use of the new cast. 1445 TheUse = InsertedCast;
1450// If we removed all uses, nuke the cast. 1460/// If the specified cast instruction is a noop copy (e.g. it's casting from 1461/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to 1462/// reduce the number of virtual registers that must be created and coalesced. 1464/// Return true if any changes are made. 1467// Sink only "cheap" (or nop) address-space casts. This is a weaker condition 1468// than sinking only nop casts, but is helpful on some platforms. 1469if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1471 ASC->getDestAddressSpace()))
1475// If this is a noop copy, 1479// This is an fp<->int conversion? 1483// If this is an extension, it will be a zero or sign extension, which 1488// If these values will be promoted, find out what they will be promoted 1489// to. This helps us consider truncates on PPC as noop copies when they 1498// If, after promotion, these are the same types, this is a noop copy. 1505// Match a simple increment by constant operation. Note that if a sub is 1506// matched, the step is negated (as if the step had been canonicalized to 1507// an add, even though we leave the instruction alone.) 1511match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1515match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1523/// If given \p PN is an inductive variable with value IVInc coming from the 1524/// backedge, and on each iteration it gets increased by Step, return pair 1525/// <IVInc, Step>. Otherwise, return std::nullopt. 1526static std::optional<std::pair<Instruction *, Constant *>>
1529if (!L || L->getHeader() != PN->
getParent() || !L->getLoopLatch())
1533if (!IVInc || LI->
getLoopFor(IVInc->getParent()) != L)
1538return std::make_pair(IVInc, Step);
1543auto *
I = dyn_cast<Instruction>(V);
1550if (
auto *PN = dyn_cast<PHINode>(
LHS))
1552return IVInc->first ==
I;
1556bool CodeGenPrepare::replaceMathCmpWithIntrinsic(
BinaryOperator *BO,
1564assert(L &&
"L should not be null after isIVIncrement()");
1565// Do not risk on moving increment into a child loop. 1566if (LI->getLoopFor(
Cmp->getParent()) != L)
1569// Finally, we need to ensure that the insert point will dominate all 1570// existing uses of the increment. 1572auto &DT = getDT(*BO->
getParent()->getParent());
1574// If we're moving up the dom tree, all uses are trivially dominated. 1575// (This is the common case for code produced by LSR.) 1578// Otherwise, special case the single use in the phi recurrence. 1581if (BO->
getParent() !=
Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1582// We used to use a dominator tree here to allow multi-block optimization. 1583// But that was problematic because: 1584// 1. It could cause a perf regression by hoisting the math op into the 1586// 2. It could cause a perf regression by creating a value that was live 1587// across multiple blocks and increasing register pressure. 1588// 3. Use of a dominator tree could cause large compile-time regression. 1589// This is because we recompute the DT on every change in the main CGP 1590// run-loop. The recomputing is probably unnecessary in many cases, so if 1591// that was fixed, using a DT here would be ok. 1593// There is one important particular case we still want to handle: if BO is 1594// the IV increment. Important properties that make it profitable: 1595// - We can speculate IV increment anywhere in the loop (as long as the 1596// indvar Phi is its only user); 1597// - Upon computing Cmp, we effectively compute something equivalent to the 1598// IV increment (despite it loops differently in the IR). So moving it up 1599// to the cmp point does not really increase register pressure. 1603// We allow matching the canonical IR (add X, C) back to (usubo X, -C). 1604if (BO->
getOpcode() == Instruction::Add &&
1605 IID == Intrinsic::usub_with_overflow) {
1606assert(isa<Constant>(Arg1) &&
"Unexpected input for usubo");
1610// Insert at the first instruction of the pair. 1613// If BO is an XOR, it is not guaranteed that it comes after both inputs to 1614// the overflow intrinsic are defined. 1615if ((BO->
getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1620assert(InsertPt !=
nullptr &&
"Parent block did not contain cmp or binop");
1623Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1624if (BO->
getOpcode() != Instruction::Xor) {
1625Value *Math = Builder.CreateExtractValue(MathOV, 0,
"math");
1629"Patterns with XOr should use the BO only in the compare");
1630Value *OV = Builder.CreateExtractValue(MathOV, 1,
"ov");
1632Cmp->eraseFromParent();
1637/// Match special-case patterns that check for unsigned add overflow. 1640// Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val) 1641// Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero) 1642Value *
A = Cmp->getOperand(0), *
B = Cmp->getOperand(1);
1644// We are not expecting non-canonical/degenerate code. Just bail out. 1645if (isa<Constant>(
A))
1650B = ConstantInt::get(
B->getType(), 1);
1656// Check the users of the variable operand of the compare looking for an add 1657// with the adjusted constant. 1658for (
User *U :
A->users()) {
1660Add = cast<BinaryOperator>(U);
1667/// Try to combine the compare into a call to the llvm.uadd.with.overflow 1668/// intrinsic. Return true if any changes were made. 1669bool CodeGenPrepare::combineToUAddWithOverflow(
CmpInst *Cmp,
1670 ModifyDT &ModifiedDT) {
1671bool EdgeCase =
false;
1677// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. 1678A =
Add->getOperand(0);
1679B =
Add->getOperand(1);
1685Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1688// We don't want to move around uses of condition values this late, so we 1689// check if it is legal to create the call to the intrinsic in the basic 1690// block containing the icmp. 1691if (
Add->getParent() !=
Cmp->getParent() && !
Add->hasOneUse())
1694if (!replaceMathCmpWithIntrinsic(
Add,
A,
B, Cmp,
1695 Intrinsic::uadd_with_overflow))
1698// Reset callers - do not crash by iterating over a dead instruction. 1699 ModifiedDT = ModifyDT::ModifyInstDT;
1703bool CodeGenPrepare::combineToUSubWithOverflow(
CmpInst *Cmp,
1704 ModifyDT &ModifiedDT) {
1705// We are not expecting non-canonical/degenerate code. Just bail out. 1707if (isa<Constant>(
A) && isa<Constant>(
B))
1710// Convert (A u> B) to (A u< B) to simplify pattern matching. 1716// Convert special-case: (A == 0) is the same as (A u< 1). 1718B = ConstantInt::get(
B->getType(), 1);
1721// Convert special-case: (A != 0) is the same as (0 u< A). 1729// Walk the users of a variable operand of a compare looking for a subtract or 1730// add with that same operand. Also match the 2nd operand of the compare to 1731// the add/sub, but that may be a negated constant operand of an add. 1732Value *CmpVariableOperand = isa<Constant>(
A) ?
B :
A;
1734for (
User *U : CmpVariableOperand->
users()) {
1735// A - B, A u< B --> usubo(A, B) 1737 Sub = cast<BinaryOperator>(U);
1741// A + (-C), A u< C (canonicalized form of (sub A, C)) 1742constAPInt *CmpC, *AddC;
1745 Sub = cast<BinaryOperator>(U);
1758 Cmp, Intrinsic::usub_with_overflow))
1761// Reset callers - do not crash by iterating over a dead instruction. 1762 ModifiedDT = ModifyDT::ModifyInstDT;
1766/// Sink the given CmpInst into user blocks to reduce the number of virtual 1767/// registers that must be created and coalesced. This is a clear win except on 1768/// targets with multiple condition code registers (PowerPC), where it might 1769/// lose; some adjustment may be wanted there. 1771/// Return true if any changes are made. 1776// Avoid sinking soft-FP comparisons, since this can move them into a loop. 1780// Only insert a cmp in each block once. 1783bool MadeChange =
false;
1786Use &TheUse = UI.getUse();
1789// Preincrement use iterator so we don't invalidate it. 1792// Don't bother for PHI nodes. 1793if (isa<PHINode>(
User))
1796// Figure out which BB this cmp is used in. 1800// If this user is in the same block as the cmp, don't change the cmp. 1804// If we have already inserted a cmp into this block, use it. 1805CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1811 Cmp->getOperand(0), Cmp->getOperand(1),
"");
1813// Propagate the debug info. 1817// Replace a use of the cmp with a use of the new cmp. 1818 TheUse = InsertedCmp;
1823// If we removed all uses, nuke the cmp. 1824if (Cmp->use_empty()) {
1825 Cmp->eraseFromParent();
1832/// For pattern like: 1834/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) 1838/// br DomCond, TrueBB, CmpBB 1839/// CmpBB: (with DomBB being the single predecessor) 1841/// Cmp = icmp eq CmpOp0, CmpOp1 1844/// It would use two comparison on targets that lowering of icmp sgt/slt is 1845/// different from lowering of icmp eq (PowerPC). This function try to convert 1846/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. 1847/// After that, DomCond and Cmp can use the same comparison so reduce one 1850/// Return true if any changes are made. 1860// If icmp eq has users other than BranchInst and SelectInst, converting it to 1861// icmp slt/sgt would introduce more redundant LLVM IR. 1862for (
User *U : Cmp->users()) {
1863if (isa<BranchInst>(U))
1865if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1870// This is a cheap/incomplete check for dominance - just match a single 1871// predecessor with a conditional branch. 1877// We want to ensure that the only way control gets to the comparison of 1878// interest is that a less/greater than comparison on the same operands is 1884if (CmpBB != FalseBB)
1887Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1894// Convert the equality comparison to the opposite of the dominating 1895// comparison and swap the direction for all branch/select users. 1896// We have conceptually converted: 1897// Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>; 1899// Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>; 1900// And similarly for branches. 1901for (
User *U : Cmp->users()) {
1902if (
auto *BI = dyn_cast<BranchInst>(U)) {
1907if (
auto *SI = dyn_cast<SelectInst>(U)) {
1910 SI->swapProfMetadata();
1919/// Many architectures use the same instruction for both subtract and cmp. Try 1920/// to swap cmp operands to match subtract operations to allow for CSE. 1922Value *Op0 = Cmp->getOperand(0);
1923Value *Op1 = Cmp->getOperand(1);
1925 isa<Constant>(Op1) || Op0 == Op1)
1928// If a subtract already has the same operands as a compare, swapping would be 1929// bad. If a subtract has the same operands as a compare but in reverse order, 1930// then swapping is good. 1932unsigned NumInspected = 0;
1934// Avoid walking many users. 1935if (++NumInspected > 128)
1943if (GoodToSwap > 0) {
1944 Cmp->swapOperands();
1952FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1956// Don't fold if the target offers free fabs and the predicate is legal. 1963// Reverse the canonicalization if it is a FP class test 1964auto ShouldReverseTransform = [](
FPClassTest ClassTest) {
1967auto [ClassVal, ClassTest] =
1973if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1978 Cmp->replaceAllUsesWith(IsFPClass);
1986Value *Incr, *RemAmt;
1987// NB: If RemAmt is a power of 2 it *should* have been transformed by now. 1991Value *AddInst, *AddOffset;
1992// Find out loop increment PHI. 1993auto *PN = dyn_cast<PHINode>(Incr);
1998// Search through a NUW add on top of the loop increment. 2004 PN = dyn_cast<PHINode>(V0);
2008 PN = dyn_cast<PHINode>(V1);
2016// This isn't strictly necessary, what we really need is one increment and any 2017// amount of initial values all being the same. 2018if (PN->getNumIncomingValues() != 2)
2021// Only trivially analyzable loops. 2023if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2026// Req that the remainder is in the loop 2027if (!L->contains(Rem))
2030// Only works if the remainder amount is a loop invaraint 2031if (!L->isLoopInvariant(RemAmt))
2034// Is the PHI a loop increment? 2039// We need remainder_amount % increment_amount to be zero. Increment of one 2040// satisfies that without any special logic and is overwhelmingly the common 2045// Need the increment to not overflow. 2049// Set output variables. 2052 AddInstOut = AddInst;
2053 AddOffsetOut = AddOffset;
2060// for(i = Start; i < End; ++i) 2061// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant; 2065// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant; 2066// for(i = Start; i < End; ++i, ++rem) 2067// Rem = rem == RemAmtLoopInvariant ? 0 : Rem; 2072Value *AddOffset, *RemAmt, *AddInst;
2075 AddOffset, LoopIncrPN))
2078// Only non-constant remainder as the extra IV is probably not profitable 2081// Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If 2082// we can rule out register pressure and ensure this `urem` is executed each 2083// iteration, its probably profitable to handle the const case as well. 2085// Potential TODO(2): Should we have a check for how "nested" this remainder 2086// operation is? The new code runs every iteration so if the remainder is 2087// guarded behind unlikely conditions this might not be worth it. 2093// If we have add create initial value for remainder. 2094// The logic here is: 2095// (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant 2097// Only proceed if the expression simplifies (otherwise we can't fully 2098// optimize out the urem). 2100assert(AddOffset &&
"We found an add but missing values");
2101// Without dom-condition/assumption cache we aren't likely to get much out 2102// of a context instruction. 2105/*IsNUW=*/true, *
DL);
2110// If we can't fully optimize out the `rem`, skip this transform. 2115// Create new remainder with induction variable. 2124// `(add (urem x, y), 1)` is always nuw. 2130 NewRem->
addIncoming(Start, L->getLoopPreheader());
2133// Insert all touched BBs. 2135 FreshBBs.
insert(L->getLoopLatch());
2142 cast<Instruction>(AddInst)->eraseFromParent();
2146bool CodeGenPrepare::optimizeURem(
Instruction *Rem) {
2152/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. 2153/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the 2154/// result cannot be zero. 2163auto *
II = cast<IntrinsicInst>(Cmp->getOperand(0));
2167 Cmp->setOperand(1, ConstantInt::get(
II->getType(), 2));
2177bool CodeGenPrepare::optimizeCmp(
CmpInst *Cmp, ModifyDT &ModifiedDT) {
2181if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2184if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2202/// Duplicate and sink the given 'and' instruction into user blocks where it is 2203/// used in a compare to allow isel to generate better code for targets where 2204/// this operation can be combined. 2206/// Return true if any changes are made. 2208 SetOfInstrs &InsertedInsts) {
2209// Double-check that we're not trying to optimize an instruction that was 2210// already optimized by some other part of this pass. 2211assert(!InsertedInsts.count(AndI) &&
2212"Attempting to optimize already optimized and instruction");
2213 (void)InsertedInsts;
2215// Nothing to do for single use in same basic block. 2220// Try to avoid cases where sinking/duplicating is likely to increase register 2227for (
auto *U : AndI->
users()) {
2230// Only sink 'and' feeding icmp with 0. 2231if (!isa<ICmpInst>(
User))
2235if (!CmpC || !CmpC->
isZero())
2245// Push the 'and' into the same block as the icmp 0. There should only be 2246// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any 2247// others, so we don't need to keep track of which BBs we insert into. 2250Use &TheUse = UI.getUse();
2253// Preincrement use iterator so we don't invalidate it. 2258// Keep the 'and' in the same place if the use is already in the same block. 2264// Propagate the debug info. 2267// Replace a use of the 'and' with a use of the new 'and'. 2268 TheUse = InsertedAnd;
2273// We removed all uses, nuke the and. 2278/// Check if the candidates could be combined with a shift instruction, which 2280/// 1. Truncate instruction 2281/// 2. And instruction and the imm is a mask of the low bits: 2282/// imm & (imm+1) == 0 2284if (!isa<TruncInst>(
User)) {
2285if (
User->getOpcode() != Instruction::And ||
2291if ((Cimm & (Cimm + 1)).getBoolValue())
2297/// Sink both shift and truncate instruction to the use of truncate's BB. 2304auto *TruncI = cast<TruncInst>(
User);
2305bool MadeChange =
false;
2308 TruncE = TruncI->user_end();
2309 TruncUI != TruncE;) {
2311Use &TruncTheUse = TruncUI.getUse();
2312Instruction *TruncUser = cast<Instruction>(*TruncUI);
2313// Preincrement use iterator so we don't invalidate it. 2321// If the use is actually a legal node, there will not be an 2322// implicit truncate. 2323// FIXME: always querying the result type is just an 2324// approximation; some nodes' legality is determined by the 2325// operand or other means. There's no good way to find out though. 2330// Don't bother for PHI nodes. 2331if (isa<PHINode>(TruncUser))
2336if (UserBB == TruncUserBB)
2340CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2342if (!InsertedShift && !InsertedTrunc) {
2346if (ShiftI->
getOpcode() == Instruction::AShr)
2348 BinaryOperator::CreateAShr(ShiftI->
getOperand(0), CI,
"");
2351 BinaryOperator::CreateLShr(ShiftI->
getOperand(0), CI,
"");
2358// It will go ahead of any debug-info. 2359 TruncInsertPt.setHeadBit(
true);
2360assert(TruncInsertPt != TruncUserBB->
end());
2364 InsertedTrunc->
insertBefore(*TruncUserBB, TruncInsertPt);
2365 InsertedTrunc->
setDebugLoc(TruncI->getDebugLoc());
2369 TruncTheUse = InsertedTrunc;
2375/// Sink the shift *right* instruction into user blocks if the uses could 2376/// potentially be combined with this shift instruction and generate BitExtract 2377/// instruction. It will only be applied if the architecture supports BitExtract 2378/// instruction. Here is an example: 2380/// %x.extract.shift = lshr i64 %arg1, 32 2382/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 2386/// %x.extract.shift.1 = lshr i64 %arg1, 32 2387/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 2389/// CodeGen will recognize the pattern in BB2 and generate BitExtract 2391/// Return true if any changes are made. 2397 /// Only insert instructions in each block once. 2402bool MadeChange =
false;
2405Use &TheUse = UI.getUse();
2407// Preincrement use iterator so we don't invalidate it. 2410// Don't bother for PHI nodes. 2411if (isa<PHINode>(
User))
2419if (UserBB == DefBB) {
2420// If the shift and truncate instruction are in the same BB. The use of 2421// the truncate(TruncUse) may still introduce another truncate if not 2422// legal. In this case, we would like to sink both shift and truncate 2423// instruction to the BB of TruncUse. 2426// i64 shift.result = lshr i64 opnd, imm 2427// trunc.result = trunc shift.result to i16 2430// ----> We will have an implicit truncate here if the architecture does 2431// not have i16 compare. 2432// cmp i16 trunc.result, opnd2 2434if (isa<TruncInst>(
User) &&
2436// If the type of the truncate is legal, no truncate will be 2437// introduced in other basic blocks. 2444// If we have already inserted a shift into this block, use it. 2447if (!InsertedShift) {
2451if (ShiftI->
getOpcode() == Instruction::AShr)
2453 BinaryOperator::CreateAShr(ShiftI->
getOperand(0), CI,
"");
2456 BinaryOperator::CreateLShr(ShiftI->
getOperand(0), CI,
"");
2463// Replace a use of the shift with a use of the new shift. 2464 TheUse = InsertedShift;
2467// If we removed all uses, or there are none, nuke the shift. 2477/// If counting leading or trailing zeros is an expensive operation and a zero 2478/// input is defined, add a check for zero to avoid calling the intrinsic. 2480/// We want to transform: 2481/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) 2485/// %cmpz = icmp eq i64 %A, 0 2486/// br i1 %cmpz, label %cond.end, label %cond.false 2488/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) 2489/// br label %cond.end 2491/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] 2493/// If the transform is performed, return true and set ModifiedDT to true. 2500// If a zero input is undefined, it doesn't make sense to despeculate that. 2504// If it's cheap to speculate, there's nothing to do. 2511// Only handle legal scalar cases. Anything else requires too much work. 2513if (Ty->
isVectorTy() || SizeInBits >
DL->getLargestLegalIntTypeSizeInBits())
2516// Bail if the value is never zero. 2521// The intrinsic will be sunk behind a compare against zero and branch. 2525 FreshBBs.
insert(CallBlock);
2527// Create another block after the count zero intrinsic. A PHI will be added 2528// in this block to select the result of the intrinsic or the bit-width 2529// constant if the input to the intrinsic is zero. 2531// Any debug-info after CountZeros should not be included. 2532 SplitPt.setHeadBit(
true);
2535 FreshBBs.
insert(EndBlock);
2537// Update the LoopInfo. The new blocks are in the same loop as the start 2540 L->addBasicBlockToLoop(CallBlock, LI);
2541 L->addBasicBlockToLoop(EndBlock, LI);
2544// Set up a builder to create a compare, conditional branch, and PHI. 2549// Replace the unconditional branch that was created by the first split with 2550// a compare against zero and a conditional branch. 2552// Avoid introducing branch on poison. This also replaces the ctz operand. 2559// Create a PHI in the end block to select either the output of the intrinsic 2560// or the bit width of the operand. 2568// We are explicitly handling the zero case, so we can set the intrinsic's 2569// undefined zero argument to 'true'. This will also prevent reprocessing the 2570// intrinsic; we only despeculate when a zero input is defined. 2572 ModifiedDT = ModifyDT::ModifyBBDT;
2576bool CodeGenPrepare::optimizeCallInst(
CallInst *CI, ModifyDT &ModifiedDT) {
2579// Lower inline assembly if we can. 2580// If we found an inline asm expession, and if the target knows how to 2581// lower it to normal LLVM code, do so now. 2584// Avoid invalidating the iterator. 2585 CurInstIterator = BB->
begin();
2586// Avoid processing instructions out of order, which could cause 2587// reuse before a value is defined. 2591// Sink address computing for memory operands into the block. 2592if (optimizeInlineAsmInst(CI))
2596// Align the pointer arguments to this call if the target thinks it's a good 2601for (
auto &Arg : CI->
args()) {
2602// We want to align both objects whose address is used directly and 2603// objects whose address is used in casts and GEPs, though it only makes 2604// sense for GEPs if the offset is a multiple of the desired alignment and 2605// if size - offset meets the size threshold. 2606if (!Arg->getType()->isPointerTy())
2609 cast<PointerType>(Arg->getType())->getAddressSpace()),
2616if ((AI = dyn_cast<AllocaInst>(Val)) && AI->
getAlign() < PrefAlign &&
2619// Global variables can only be aligned if they are defined in this 2620// object (i.e. they are uniquely initialized in this object), and 2621// over-aligning global variables that have an explicit section is 2630// If this is a memcpy (or similar) then we may be able to improve the 2635if (!MIDestAlign || DestAlign > *MIDestAlign)
2636MI->setDestAlignment(DestAlign);
2638MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2640if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2641 MTI->setSourceAlignment(SrcAlign);
2645// If we have a cold call site, try to sink addressing computation into the 2646// cold block. This interacts with our handling for loads and stores to 2647// ensure that we can fold all uses of a potential addressing computation 2648// into their uses. TODO: generalize this to work over profiling data 2651for (
auto &Arg : CI->
args()) {
2652if (!Arg->getType()->isPointerTy())
2654unsigned AS = Arg->getType()->getPointerAddressSpace();
2655if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2661switch (
II->getIntrinsicID()) {
2664case Intrinsic::assume:
2666case Intrinsic::allow_runtime_check:
2667case Intrinsic::allow_ubsan_check:
2668case Intrinsic::experimental_widenable_condition: {
2669// Give up on future widening opportunities so that we can fold away dead 2670// paths and merge blocks before going into block-local instruction 2672if (
II->use_empty()) {
2673II->eraseFromParent();
2677 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2682case Intrinsic::objectsize:
2684case Intrinsic::is_constant:
2686case Intrinsic::aarch64_stlxr:
2687case Intrinsic::aarch64_stxr: {
2692// Sink a zext feeding stlxr/stxr before it, so it can be folded into it. 2694// Mark this instruction as "inserted by CGP", so that other 2695// optimizations don't touch it. 2696 InsertedInsts.insert(ExtVal);
2700case Intrinsic::launder_invariant_group:
2701case Intrinsic::strip_invariant_group: {
2702Value *ArgVal =
II->getArgOperand(0);
2703auto it = LargeOffsetGEPMap.
find(
II);
2704if (it != LargeOffsetGEPMap.
end()) {
2705// Merge entries in LargeOffsetGEPMap to reflect the RAUW. 2706// Make sure not to have to deal with iterator invalidation 2707// after possibly adding ArgVal to LargeOffsetGEPMap. 2708auto GEPs = std::move(it->second);
2709 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2714II->eraseFromParent();
2717case Intrinsic::cttz:
2718case Intrinsic::ctlz:
2719// If counting zeros is expensive, try to avoid it. 2722case Intrinsic::fshl:
2723case Intrinsic::fshr:
2724return optimizeFunnelShift(
II);
2725case Intrinsic::dbg_assign:
2726case Intrinsic::dbg_value:
2727return fixupDbgValue(
II);
2728case Intrinsic::masked_gather:
2729return optimizeGatherScatterInst(
II,
II->getArgOperand(0));
2730case Intrinsic::masked_scatter:
2731return optimizeGatherScatterInst(
II,
II->getArgOperand(1));
2737while (!PtrOps.
empty()) {
2740if (optimizeMemoryInst(
II, PtrVal, AccessTy, AS))
2745// From here on out we're working with named functions. 2750// Lower all default uses of _chk calls. This is very similar 2751// to what InstCombineCalls does, but here we are only lowering calls 2752// to fortified library functions (e.g. __memcpy_chk) that have the default 2753// "don't know" as the objectsize. Anything else should be left alone. 2756if (
Value *V = Simplifier.optimizeCall(CI, Builder)) {
2762// SCCP may have propagated, among other things, C++ static variables across 2763// calls. If this happens to be the case, we may want to undo it in order to 2764// avoid redundant pointer computation of the constant, as the function method 2765// returning the constant needs to be executed anyways. 2767if (!
F->getReturnType()->isPointerTy())
2771for (
auto &BB : *
F) {
2773if (
auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2776elseif (V != UniformValue)
2787if (
Callee->hasExactDefinition()) {
2789bool MadeChange =
false;
2791auto *
I = dyn_cast<Instruction>(
U.getUser());
2793// Limit to the same basic block to avoid extending the call-site live 2794// range, which otherwise could increase register pressure. 2814if (
constauto *
II = dyn_cast<IntrinsicInst>(CI))
2815switch (
II->getIntrinsicID()) {
2816case Intrinsic::memset:
2817case Intrinsic::memcpy:
2818case Intrinsic::memmove:
2826if (Callee && TLInfo && TLInfo->
getLibFunc(*Callee, LF))
2829case LibFunc_strncpy:
2831case LibFunc_strncat:
2840/// Look for opportunities to duplicate return instructions to the predecessor 2841/// to enable tail call optimizations. The case it is currently looking for is 2842/// the following one. Known intrinsics or library function that may be tail 2843/// called are taken into account as well. 2846/// %tmp0 = tail call i32 @f0() 2849/// %tmp1 = tail call i32 @f1() 2852/// %tmp2 = tail call i32 @f2() 2855/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 2863/// %tmp0 = tail call i32 @f0() 2866/// %tmp1 = tail call i32 @f1() 2869/// %tmp2 = tail call i32 @f2() 2872bool CodeGenPrepare::dupRetToEnableTailCallOpts(
BasicBlock *BB,
2873 ModifyDT &ModifiedDT) {
2881assert(LI->getLoopFor(BB) ==
nullptr &&
"A return block cannot be in a loop");
2888 BCI = dyn_cast<BitCastInst>(V);
2892 EVI = dyn_cast<ExtractValueInst>(V);
2899 PN = dyn_cast<PHINode>(V);
2905auto isLifetimeEndOrBitCastFor = [](
constInstruction *Inst) {
2906constBitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2911returnII->getIntrinsicID() == Intrinsic::lifetime_end;
2917auto isFakeUse = [&FakeUses](
constInstruction *Inst) {
2918if (
auto *
II = dyn_cast<IntrinsicInst>(Inst);
2919II &&
II->getIntrinsicID() == Intrinsic::fake_use) {
2920// Record the instruction so it can be preserved when the exit block is 2921// removed. Do not preserve the fake use that uses the result of the 2923// Do not copy fake uses that use the result of a PHI node. 2924// FIXME: If we do want to copy the fake use into the return blocks, we 2925// have to figure out which of the PHI node operands to use for each 2927if (!isa<PHINode>(
II->getOperand(0))) {
2936// Make sure there are no instructions between the first instruction 2939// Skip over debug and the bitcast. 2940while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||
2941 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(&*BI) ||
2947 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 2951// Record the call instructions so we can insert any fake uses 2952// that need to be preserved before them. 2956// Look through bitcasts. 2958CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2960// Make sure the phi value is indeed produced by the tail call. 2967// Consider the cases in which the phi value is indirectly produced by 2968// the tail call, for example when encountering memset(), memmove(), 2969// strcpy(), whose return value may have been optimized out. In such 2970// cases, the value needs to be the first function argument. 2973// tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1) 2976// %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ] 2978 CI = dyn_cast_or_null<CallInst>(
2994if (!VisitedBBs.
insert(Pred).second)
2996if (
Instruction *
I = Pred->rbegin()->getPrevNonDebugInstruction(
true)) {
3000// Either we return void or the return value must be the first 3001// argument of a known intrinsic or library function. 3002if (!V || isa<UndefValue>(V) ||
3014for (
autoconst &TailCallBB : TailCallBBs) {
3015// Make sure the call instruction is followed by an unconditional branch to 3017BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3021// Duplicate the return into TailCallBB. 3024BFI->getBlockFreq(BB) >=
BFI->getBlockFreq(TailCallBB));
3025BFI->setBlockFreq(BB,
3026 (
BFI->getBlockFreq(BB) -
BFI->getBlockFreq(TailCallBB)));
3027 ModifiedDT = ModifyDT::ModifyBBDT;
3032// If we eliminated all predecessors of the block, delete the block now. 3034// Copy the fake uses found in the original return block to all blocks 3035// that contain tail calls. 3036for (
auto *CI : CallInsts) {
3037for (
autoconst *FakeUse : FakeUses) {
3038auto *ClonedInst = FakeUse->clone();
3048//===----------------------------------------------------------------------===// 3049// Memory Optimization 3050//===----------------------------------------------------------------------===// 3054/// This is an extended version of TargetLowering::AddrMode 3055/// which holds actual Value*'s for register values. 3059Value *OriginalValue =
nullptr;
3064 BaseRegField = 0x01,
3066 BaseOffsField = 0x04,
3067 ScaledRegField = 0x08,
3069 MultipleFields = 0xff
3078// First check that the types are the same on each field, as differing types 3079// is something we can't cope with later on. 3082return MultipleFields;
3083if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3084return MultipleFields;
3087return MultipleFields;
3089// Conservatively reject 'inbounds' mismatches. 3090if (InBounds != other.InBounds)
3091return MultipleFields;
3093// Check each field to see if it differs. 3097if (BaseGV != other.BaseGV)
3099if (BaseOffs != other.BaseOffs)
3103// Don't count 0 as being a different scale, because that actually means 3104// unscaled (which will already be counted by having no ScaledReg). 3109return MultipleFields;
3111returnstatic_cast<FieldName
>(
Result);
3114// An AddrMode is trivial if it involves no calculation i.e. it is just a base 3117// An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is 3118// trivial if at most one of these terms is nonzero, except that BaseGV and 3119// BaseReg both being zero actually means a null pointer value, which we 3120// consider to be 'non-zero' here. 3135return ConstantInt::get(IntPtrTy, BaseOffs);
3139void SetCombinedField(FieldName
Field,
Value *V,
3145case ExtAddrMode::BaseRegField:
3148case ExtAddrMode::BaseGVField:
3149// A combined BaseGV is an Instruction, not a GlobalValue, so it goes 3150// in the BaseReg field. 3155case ExtAddrMode::ScaledRegField:
3157// If we have a mix of scaled and unscaled addrmodes then we want scale 3158// to be the scale and not zero. 3166case ExtAddrMode::BaseOffsField:
3167// The offset is no longer a constant, so it goes in ScaledReg with a 3185#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3187bool NeedPlus =
false;
3193 BaseGV->printAsOperand(
OS,
/*PrintType=*/false);
3198OS << (NeedPlus ?
" + " :
"") << BaseOffs;
3203OS << (NeedPlus ?
" + " :
"") <<
"Base:";
3204BaseReg->printAsOperand(
OS,
/*PrintType=*/false);
3208OS << (NeedPlus ?
" + " :
"") <<
Scale <<
"*";
3221}
// end anonymous namespace 3225/// This class provides transaction based operation on the IR. 3226/// Every change made through this class is recorded in the internal state and 3227/// can be undone (rollback) until commit is called. 3228/// CGP does not check if instructions could be speculatively executed when 3229/// moved. Preserving the original location would pessimize the debugging 3230/// experience, as well as negatively impact the quality of sample PGO. 3231classTypePromotionTransaction {
3232 /// This represents the common interface of the individual transaction. 3233 /// Each class implements the logic for doing one specific modification on 3234 /// the IR via the TypePromotionTransaction. 3235classTypePromotionAction {
3237 /// The Instruction modified. 3241 /// Constructor of the action. 3242 /// The constructor performs the related action on the IR. 3243 TypePromotionAction(
Instruction *Inst) : Inst(Inst) {}
3245virtual ~TypePromotionAction() =
default;
3247 /// Undo the modification done by this action. 3248 /// When this method is called, the IR must be in the same state as it was 3249 /// before this action was applied. 3250 /// \pre Undoing the action works if and only if the IR is in the exact same 3251 /// state as it was directly after this action was applied. 3252virtualvoid undo() = 0;
3254 /// Advocate every change made by this action. 3255 /// When the results on the IR of the action are to be kept, it is important 3256 /// to call this function, otherwise hidden information may be kept forever. 3257virtualvoid commit() {
3258// Nothing to be done, this action is not doing anything. 3262 /// Utility to remember the position of an instruction. 3263classInsertionHandler {
3264 /// Position of an instruction. 3265 /// Either an instruction: 3266 /// - Is the first in a basic block: BB is used. 3267 /// - Has a previous instruction: PrevInst is used. 3272 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3274 /// Remember whether or not the instruction had a previous instruction. 3275bool HasPrevInstruction;
3278 /// Record the position of \p Inst. 3280 HasPrevInstruction = (Inst != &*(Inst->
getParent()->begin()));
3283// Record where we would have to re-insert the instruction in the sequence 3284// of DbgRecords, if we ended up reinserting. 3288if (HasPrevInstruction) {
3295 /// Insert \p Inst at the recorded position. 3297if (HasPrevInstruction) {
3309 Inst->
getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3313 /// Move an instruction before another. 3314classInstructionMoveBefore :
public TypePromotionAction {
3315 /// Original position of the instruction. 3316 InsertionHandler Position;
3319 /// Move \p Inst before \p Before. 3321 : TypePromotionAction(Inst), Position(Inst) {
3327 /// Move the instruction back to its original position. 3328void undo()
override{
3330 Position.insert(Inst);
3334 /// Set the operand of an instruction with a new value. 3335classOperandSetter :
public TypePromotionAction {
3336 /// Original operand of the instruction. 3339 /// Index of the modified instruction. 3343 /// Set \p Idx operand of \p Inst with \p NewVal. 3345 : TypePromotionAction(Inst),
Idx(
Idx) {
3347 <<
"for:" << *Inst <<
"\n" 3348 <<
"with:" << *NewVal <<
"\n");
3353 /// Restore the original value of the instruction. 3354void undo()
override{
3356 <<
"for: " << *Inst <<
"\n" 3357 <<
"with: " << *Origin <<
"\n");
3362 /// Hide the operands of an instruction. 3363 /// Do as if this instruction was not using any of its operands. 3364classOperandsHider :
public TypePromotionAction {
3365 /// The list of original operands. 3369 /// Remove \p Inst from the uses of the operands of \p Inst. 3370 OperandsHider(
Instruction *Inst) : TypePromotionAction(Inst) {
3373 OriginalValues.
reserve(NumOpnds);
3374for (
unsigned It = 0; It < NumOpnds; ++It) {
3375// Save the current operand. 3379// We could use OperandSetter here, but that would imply an overhead 3380// that we are not willing to pay. 3385 /// Restore the original list of uses. 3386void undo()
override{
3388for (
unsigned It = 0, EndIt = OriginalValues.
size(); It != EndIt; ++It)
3393 /// Build a truncate instruction. 3394classTruncBuilder :
public TypePromotionAction {
3398 /// Build a truncate instruction of \p Opnd producing a \p Ty 3400 /// trunc Opnd to Ty. 3401 TruncBuilder(
Instruction *Opnd,
Type *Ty) : TypePromotionAction(Opnd) {
3403 Builder.SetCurrentDebugLocation(
DebugLoc());
3404 Val = Builder.CreateTrunc(Opnd, Ty,
"promoted");
3408 /// Get the built value. 3409Value *getBuiltValue() {
return Val; }
3411 /// Remove the built instruction. 3412void undo()
override{
3414if (
Instruction *IVal = dyn_cast<Instruction>(Val))
3415 IVal->eraseFromParent();
3419 /// Build a sign extension instruction. 3420classSExtBuilder :
public TypePromotionAction {
3424 /// Build a sign extension instruction of \p Opnd producing a \p Ty 3426 /// sext Opnd to Ty. 3428 : TypePromotionAction(InsertPt) {
3430 Val = Builder.CreateSExt(Opnd, Ty,
"promoted");
3434 /// Get the built value. 3435Value *getBuiltValue() {
return Val; }
3437 /// Remove the built instruction. 3438void undo()
override{
3440if (
Instruction *IVal = dyn_cast<Instruction>(Val))
3441 IVal->eraseFromParent();
3445 /// Build a zero extension instruction. 3446classZExtBuilder :
public TypePromotionAction {
3450 /// Build a zero extension instruction of \p Opnd producing a \p Ty 3452 /// zext Opnd to Ty. 3454 : TypePromotionAction(InsertPt) {
3456 Builder.SetCurrentDebugLocation(
DebugLoc());
3457 Val = Builder.CreateZExt(Opnd, Ty,
"promoted");
3461 /// Get the built value. 3462Value *getBuiltValue() {
return Val; }
3464 /// Remove the built instruction. 3465void undo()
override{
3467if (
Instruction *IVal = dyn_cast<Instruction>(Val))
3468 IVal->eraseFromParent();
3472 /// Mutate an instruction to another type. 3473classTypeMutator :
public TypePromotionAction {
3474 /// Record the original type. 3478 /// Mutate the type of \p Inst into \p NewTy. 3480 : TypePromotionAction(Inst), OrigTy(Inst->
getType()) {
3481LLVM_DEBUG(
dbgs() <<
"Do: MutateType: " << *Inst <<
" with " << *NewTy
3486 /// Mutate the instruction back to its original type. 3487void undo()
override{
3488LLVM_DEBUG(
dbgs() <<
"Undo: MutateType: " << *Inst <<
" with " << *OrigTy
3494 /// Replace the uses of an instruction by another instruction. 3495classUsesReplacer :
public TypePromotionAction {
3496 /// Helper structure to keep track of the replaced uses. 3497structInstructionAndIdx {
3498 /// The instruction using the instruction. 3501 /// The index where this instruction is used for Inst. 3508 /// Keep track of the original uses (pair Instruction, Index). 3510 /// Keep track of the debug users. 3512 /// And non-instruction debug-users too. 3515 /// Keep track of the new value so that we can undo it by replacing 3516 /// instances of the new value with the original value. 3522 /// Replace all the use of \p Inst by \p New. 3524 : TypePromotionAction(Inst),
New(
New) {
3525LLVM_DEBUG(
dbgs() <<
"Do: UsersReplacer: " << *Inst <<
" with " << *New
3527// Record the original uses. 3530 OriginalUses.
push_back(InstructionAndIdx(UserI,
U.getOperandNo()));
3532// Record the debug uses separately. They are not in the instruction's 3533// use list, but they are replaced by RAUW. 3536// Now, we can replace the uses. 3540 /// Reassign the original uses of Inst to Inst. 3541void undo()
override{
3543for (InstructionAndIdx &
Use : OriginalUses)
3544Use.Inst->setOperand(
Use.Idx, Inst);
3545// RAUW has replaced all original uses with references to the new value, 3546// including the debug uses. Since we are undoing the replacements, 3547// the original debug uses must also be reinstated to maintain the 3548// correctness and utility of debug value instructions. 3549for (
auto *DVI : DbgValues)
3550 DVI->replaceVariableLocationOp(New, Inst);
3551// Similar story with DbgVariableRecords, the non-instruction 3552// representation of dbg.values. 3554 DVR->replaceVariableLocationOp(New, Inst);
3558 /// Remove an instruction from the IR. 3559classInstructionRemover :
public TypePromotionAction {
3560 /// Original position of the instruction. 3561 InsertionHandler Inserter;
3563 /// Helper structure to hide all the link to the instruction. In other 3564 /// words, this helps to do as if the instruction was removed. 3565 OperandsHider Hider;
3567 /// Keep track of the uses replaced, if any. 3568 UsesReplacer *Replacer =
nullptr;
3570 /// Keep track of instructions removed. 3571 SetOfInstrs &RemovedInsts;
3574 /// Remove all reference of \p Inst and optionally replace all its 3576 /// \p RemovedInsts Keep track of the instructions removed by this Action. 3577 /// \pre If !Inst->use_empty(), then New != nullptr 3578 InstructionRemover(
Instruction *Inst, SetOfInstrs &RemovedInsts,
3580 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3581 RemovedInsts(RemovedInsts) {
3583 Replacer =
new UsesReplacer(Inst, New);
3585 RemovedInsts.insert(Inst);
3586 /// The instructions removed here will be freed after completing 3587 /// optimizeBlock() for all blocks as we need to keep track of the 3588 /// removed instructions during promotion. 3592 ~InstructionRemover()
override{
delete Replacer; }
3594 InstructionRemover &operator=(
const InstructionRemover &other) =
delete;
3595 InstructionRemover(
const InstructionRemover &other) =
delete;
3597 /// Resurrect the instruction and reassign it to the proper uses if 3598 /// new value was provided when build this action. 3599void undo()
override{
3600LLVM_DEBUG(
dbgs() <<
"Undo: InstructionRemover: " << *Inst <<
"\n");
3601 Inserter.insert(Inst);
3605 RemovedInsts.erase(Inst);
3610 /// Restoration point. 3611 /// The restoration point is a pointer to an action instead of an iterator 3612 /// because the iterator may be invalidated but not the pointer. 3613usingConstRestorationPt =
const TypePromotionAction *;
3615 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3616 : RemovedInsts(RemovedInsts) {}
3618 /// Advocate every changes made in that transaction. Return true if any change 3622 /// Undo all the changes made after the given point. 3623void rollback(ConstRestorationPt Point);
3625 /// Get the current restoration point. 3626 ConstRestorationPt getRestorationPoint()
const;
3628 /// \name API for IR modification with state keeping to support rollback. 3630 /// Same as Instruction::setOperand. 3633 /// Same as Instruction::eraseFromParent. 3636 /// Same as Value::replaceAllUsesWith. 3639 /// Same as Value::mutateType. 3642 /// Same as IRBuilder::createTrunc. 3645 /// Same as IRBuilder::createSExt. 3648 /// Same as IRBuilder::createZExt. 3652 /// The ordered list of actions made so far. 3658 SetOfInstrs &RemovedInsts;
3661}
// end anonymous namespace 3663void TypePromotionTransaction::setOperand(
Instruction *Inst,
unsignedIdx,
3665 Actions.
push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3669void TypePromotionTransaction::eraseInstruction(
Instruction *Inst,
3672 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3673 Inst, RemovedInsts, NewVal));
3676void TypePromotionTransaction::replaceAllUsesWith(
Instruction *Inst,
3679 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3682void TypePromotionTransaction::mutateType(
Instruction *Inst,
Type *NewTy) {
3684 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3688 std::unique_ptr<TruncBuilder>
Ptr(
new TruncBuilder(Opnd, Ty));
3690 Actions.push_back(std::move(
Ptr));
3696 std::unique_ptr<SExtBuilder>
Ptr(
new SExtBuilder(Inst, Opnd, Ty));
3698 Actions.push_back(std::move(
Ptr));
3704 std::unique_ptr<ZExtBuilder>
Ptr(
new ZExtBuilder(Inst, Opnd, Ty));
3706 Actions.push_back(std::move(
Ptr));
3710TypePromotionTransaction::ConstRestorationPt
3711TypePromotionTransaction::getRestorationPoint()
const{
3712return !Actions.empty() ? Actions.back().get() :
nullptr;
3715bool TypePromotionTransaction::commit() {
3716for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3723void TypePromotionTransaction::rollback(
3724 TypePromotionTransaction::ConstRestorationPt Point) {
3725while (!Actions.empty() && Point != Actions.back().get()) {
3726 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3733/// A helper class for matching addressing modes. 3735/// This encapsulates the logic for matching the target-legal addressing modes. 3736classAddressingModeMatcher {
3744 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 3745 /// the memory instruction that we're computing this address for. 3750 /// This is the addressing mode that we're building up. This is 3751 /// part of the return value of this addressing mode matching stuff. 3754 /// The instructions inserted by other CodeGenPrepare optimizations. 3755const SetOfInstrs &InsertedInsts;
3757 /// A map from the instructions to their type before promotion. 3758 InstrToOrigTy &PromotedInsts;
3760 /// The ongoing transaction where every action should be registered. 3761 TypePromotionTransaction &TPT;
3763// A GEP which has too large offset to be folded into the addressing mode. 3764 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3766 /// This is set to true when we should not do profitability checks. 3767 /// When true, IsProfitableToFoldIntoAddressingMode always returns true. 3768bool IgnoreProfitability;
3770 /// True if we are optimizing for size. 3776 AddressingModeMatcher(
3781const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3782 TypePromotionTransaction &TPT,
3785 : AddrModeInsts(AMI), TLI(TLI),
TRI(
TRI),
3786DL(
MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3787 AccessTy(AT), AddrSpace(AS), MemoryInst(
MI),
AddrMode(AM),
3788 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3789 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI),
BFI(
BFI) {
3790 IgnoreProfitability =
false;
3794 /// Find the maximal addressing mode that a load/store of V can fold, 3795 /// give an access type of AccessTy. This returns a list of involved 3796 /// instructions in AddrModeInsts. 3797 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare 3799 /// \p PromotedInsts maps the instructions to their type before promotion. 3800 /// \p The ongoing transaction where every action should be registered. 3807 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3812boolSuccess = AddressingModeMatcher(AddrModeInsts, TLI,
TRI, LI, getDTFn,
3813 AccessTy, AS, MemoryInst, Result,
3814 InsertedInsts, PromotedInsts, TPT,
3815 LargeOffsetGEP, OptSize, PSI, BFI)
3823bool matchScaledValue(
Value *ScaleReg, int64_t Scale,
unsignedDepth);
3825bool matchOperationAddr(
User *AddrInst,
unsigned Opcode,
unsignedDepth,
3826bool *MovedAway =
nullptr);
3827bool isProfitableToFoldIntoAddressingMode(
Instruction *
I,
3830bool valueAlreadyLiveAtInst(
Value *Val,
Value *KnownLive1,
Value *KnownLive2);
3831bool isPromotionProfitable(
unsigned NewCost,
unsigned OldCost,
3832Value *PromotedOperand)
const;
3837/// An iterator for PhiNodeSet. 3838classPhiNodeSetIterator {
3839 PhiNodeSet *
constSet;
3840size_t CurrentIndex = 0;
3843 /// The constructor. Start should point to either a valid element, or be equal 3844 /// to the size of the underlying SmallVector of the PhiNodeSet. 3845 PhiNodeSetIterator(PhiNodeSet *
const Set,
size_t Start);
3847 PhiNodeSetIterator &operator++();
3852/// Keeps a set of PHINodes. 3854/// This is a minimal set implementation for a specific use case: 3855/// It is very fast when there are very few elements, but also provides good 3856/// performance when there are many. It is similar to SmallPtrSet, but also 3857/// provides iteration by insertion order, which is deterministic and stable 3858/// across runs. It is also similar to SmallSetVector, but provides removing 3859/// elements in O(1) time. This is achieved by not actually removing the element 3860/// from the underlying vector, so comes at the cost of using more memory, but 3861/// that is fine, since PhiNodeSets are used as short lived objects. 3863friendclassPhiNodeSetIterator;
3866usingiterator = PhiNodeSetIterator;
3868 /// Keeps the elements in the order of their insertion in the underlying 3869 /// vector. To achieve constant time removal, it never deletes any element. 3872 /// Keeps the elements in the underlying set implementation. This (and not the 3873 /// NodeList defined above) is the source of truth on whether an element 3874 /// is actually in the collection. 3877 /// Points to the first valid (not deleted) element when the set is not empty 3878 /// and the value is not zero. Equals to the size of the underlying vector 3879 /// when the set is empty. When the value is 0, as in the beginning, the 3880 /// first element may or may not be valid. 3881size_t FirstValidElement = 0;
3884 /// Inserts a new element to the collection. 3885 /// \returns true if the element is actually added, i.e. was not in the 3886 /// collection before the operation. 3888if (NodeMap.insert(std::make_pair(
Ptr,
NodeList.size())).second) {
3895 /// Removes the element from the collection. 3896 /// \returns whether the element is actually removed, i.e. was in the 3897 /// collection before the operation. 3899if (NodeMap.erase(
Ptr)) {
3900 SkipRemovedElements(FirstValidElement);
3906 /// Removes all elements and clears the collection. 3910 FirstValidElement = 0;
3913 /// \returns an iterator that will iterate the elements in the order of 3916if (FirstValidElement == 0)
3917 SkipRemovedElements(FirstValidElement);
3918return PhiNodeSetIterator(
this, FirstValidElement);
3921 /// \returns an iterator that points to the end of the collection. 3922 iterator
end() {
return PhiNodeSetIterator(
this,
NodeList.size()); }
3924 /// Returns the number of elements in the collection. 3925size_tsize()
const{
return NodeMap.size(); }
3927 /// \returns 1 if the given element is in the collection, and 0 if otherwise. 3931 /// Updates the CurrentIndex so that it will point to a valid element. 3933 /// If the element of NodeList at CurrentIndex is valid, it does not 3934 /// change it. If there are no more valid elements, it updates CurrentIndex 3935 /// to point to the end of the NodeList. 3936void SkipRemovedElements(
size_t &CurrentIndex) {
3937while (CurrentIndex <
NodeList.size()) {
3938auto it = NodeMap.find(
NodeList[CurrentIndex]);
3939// If the element has been deleted and added again later, NodeMap will 3940// point to a different index, so CurrentIndex will still be invalid. 3941if (it != NodeMap.end() && it->second == CurrentIndex)
3948PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *
const Set,
size_t Start)
3949 :
Set(
Set), CurrentIndex(Start) {}
3951PHINode *PhiNodeSetIterator::operator*()
const{
3953"PhiNodeSet access out of range");
3954returnSet->NodeList[CurrentIndex];
3957PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3959"PhiNodeSet access out of range");
3961Set->SkipRemovedElements(CurrentIndex);
3965bool PhiNodeSetIterator::operator==(
const PhiNodeSetIterator &RHS)
const{
3966return CurrentIndex ==
RHS.CurrentIndex;
3969bool PhiNodeSetIterator::operator!=(
const PhiNodeSetIterator &RHS)
const{
3970return !((*this) ==
RHS);
3973/// Keep track of simplification of Phi nodes. 3974/// Accept the set of all phi nodes and erase phi node from this set 3975/// if it is simplified. 3976classSimplificationTracker {
3979// Tracks newly created Phi nodes. The elements are iterated by insertion 3981 PhiNodeSet AllPhiNodes;
3982// Tracks newly created Select nodes. 3990auto SV = Storage.
find(V);
3991if (SV == Storage.
end())
4001while (!WorkList.
empty()) {
4005if (
auto *PI = dyn_cast<Instruction>(
P))
4007for (
auto *U : PI->users())
4010 PI->replaceAllUsesWith(V);
4011if (
auto *
PHI = dyn_cast<PHINode>(PI))
4012 AllPhiNodes.erase(
PHI);
4013if (
auto *
Select = dyn_cast<SelectInst>(PI))
4015 PI->eraseFromParent();
4025while (OldReplacement !=
From) {
4027 To = dyn_cast<PHINode>(OldReplacement);
4028 OldReplacement = Get(
From);
4030assert(To && Get(To) == To &&
"Replacement PHI node is already replaced.");
4032From->replaceAllUsesWith(To);
4033 AllPhiNodes.erase(
From);
4034From->eraseFromParent();
4037 PhiNodeSet &newPhiNodes() {
return AllPhiNodes; }
4039void insertNewPhi(
PHINode *PN) { AllPhiNodes.insert(PN); }
4043unsigned countNewPhiNodes()
const{
return AllPhiNodes.size(); }
4045unsigned countNewSelectNodes()
const{
return AllSelectNodes.
size(); }
4047void destroyNewNodes(
Type *CommonType) {
4048// For safe erasing, replace the uses with dummy value first. 4050for (
auto *
I : AllPhiNodes) {
4051I->replaceAllUsesWith(Dummy);
4052I->eraseFromParent();
4054 AllPhiNodes.clear();
4055for (
auto *
I : AllSelectNodes) {
4056I->replaceAllUsesWith(Dummy);
4057I->eraseFromParent();
4059 AllSelectNodes.clear();
4063/// A helper class for combining addressing modes. 4064classAddressingModeCombiner {
4066typedef std::pair<PHINode *, PHINode *> PHIPair;
4069 /// The addressing modes we've collected. 4072 /// The field in which the AddrModes differ, when we have more than one. 4073 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4075 /// Are the AddrModes that we have all just equal to their original values? 4076bool AllAddrModesTrivial =
true;
4078 /// Common Type for all different fields in addressing modes. 4079Type *CommonType =
nullptr;
4081 /// SimplifyQuery for simplifyInstruction utility. 4084 /// Original Address. 4087 /// Common value among addresses 4088Value *CommonValue =
nullptr;
4092 : SQ(_SQ), Original(OriginalValue) {}
4094 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4096 /// Get the combined AddrMode 4099 /// Add a new AddrMode if it's compatible with the AddrModes we already 4101 /// \return True iff we succeeded in doing so. 4103// Take note of if we have any non-trivial AddrModes, as we need to detect 4104// when all AddrModes are trivial as then we would introduce a phi or select 4105// which just duplicates what's already there. 4106 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4108// If this is the first addrmode then everything is fine. 4109if (AddrModes.
empty()) {
4114// Figure out how different this is from the other address modes, which we 4115// can do just by comparing against the first one given that we only care 4116// about the cumulative difference. 4117 ExtAddrMode::FieldName ThisDifferentField =
4118 AddrModes[0].compare(NewAddrMode);
4119if (DifferentField == ExtAddrMode::NoField)
4120 DifferentField = ThisDifferentField;
4121elseif (DifferentField != ThisDifferentField)
4122 DifferentField = ExtAddrMode::MultipleFields;
4124// If NewAddrMode differs in more than one dimension we cannot handle it. 4125bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4127// If Scale Field is different then we reject. 4128 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4130// We also must reject the case when base offset is different and 4131// scale reg is not null, we cannot handle this case due to merge of 4132// different offsets will be used as ScaleReg. 4133 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4136// We also must reject the case when GV is different and BaseReg installed 4137// due to we want to use base reg as a merge of GV values. 4138 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4139 !NewAddrMode.HasBaseReg);
4141// Even if NewAddMode is the same we still need to collect it due to 4142// original value is different. And later we will need all original values 4143// as anchors during finding the common Phi node. 4152 /// Combine the addressing modes we've collected into a single 4153 /// addressing mode. 4154 /// \return True iff we successfully combined them or we only had one so 4155 /// didn't need to combine them anyway. 4156bool combineAddrModes() {
4157// If we have no AddrModes then they can't be combined. 4158if (AddrModes.
size() == 0)
4161// A single AddrMode can trivially be combined. 4162if (AddrModes.
size() == 1 || DifferentField == ExtAddrMode::NoField)
4165// If the AddrModes we collected are all just equal to the value they are 4166// derived from then combining them wouldn't do anything useful. 4167if (AllAddrModesTrivial)
4170if (!addrModeCombiningAllowed())
4173// Build a map between <original value, basic block where we saw it> to 4174// value of base register. 4175// Bail out if there is no common type. 4176 FoldAddrToValueMapping
Map;
4177if (!initializeMap(Map))
4180 CommonValue = findCommon(Map);
4182 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4183return CommonValue !=
nullptr;
4187 /// `CommonValue` may be a placeholder inserted by us. 4188 /// If the placeholder is not used, we should remove this dead instruction. 4189void eraseCommonValueIfDead() {
4190if (CommonValue && CommonValue->
getNumUses() == 0)
4191if (
Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4192 CommonInst->eraseFromParent();
4195 /// Initialize Map with anchor values. For address seen 4196 /// we set the value of different field saw in this address. 4197 /// At the same time we find a common type for different field we will 4198 /// use to create new Phi/Select nodes. Keep it in CommonType field. 4199 /// Return false if there is no common type found. 4200bool initializeMap(FoldAddrToValueMapping &Map) {
4201// Keep track of keys where the value is null. We will need to replace it 4202// with constant null when we know the common type. 4205for (
auto &AM : AddrModes) {
4206Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4209if (CommonType && CommonType !=
Type)
4212Map[AM.OriginalValue] = DV;
4217assert(CommonType &&
"At least one non-null value must be!");
4218for (
auto *V : NullValue)
4223 /// We have mapping between value A and other value B where B was a field in 4224 /// addressing mode represented by A. Also we have an original value C 4225 /// representing an address we start with. Traversing from C through phi and 4226 /// selects we ended up with A's in a map. This utility function tries to find 4227 /// a value V which is a field in addressing mode C and traversing through phi 4228 /// nodes and selects we will end up in corresponded values B in a map. 4229 /// The utility will create a new Phi/Selects if needed. 4230// The simple example looks as follows: 4238// p = phi [p1, BB1], [p2, BB2] 4245// The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3. 4246Value *findCommon(FoldAddrToValueMapping &Map) {
4247// Tracks the simplification of newly created phi nodes. The reason we use 4248// this mapping is because we will add new created Phi nodes in AddrToBase. 4249// Simplification of Phi nodes is recursive, so some Phi node may 4250// be simplified after we added it to AddrToBase. In reality this 4251// simplification is possible only if original phi/selects were not 4253// Using this mapping we can find the current value in AddrToBase. 4254 SimplificationTracker
ST(SQ);
4256// First step, DFS to create PHI nodes for all intermediate blocks. 4257// Also fill traverse order for the second step. 4259 InsertPlaceholders(Map, TraverseOrder, ST);
4261// Second Step, fill new nodes by merged values and simplify if possible. 4262 FillPlaceholders(Map, TraverseOrder, ST);
4265ST.destroyNewNodes(CommonType);
4269// Now we'd like to match New Phi nodes to existed ones. 4270unsigned PhiNotMatchedCount = 0;
4272ST.destroyNewNodes(CommonType);
4278 NumMemoryInstsPhiCreated +=
ST.countNewPhiNodes() + PhiNotMatchedCount;
4279 NumMemoryInstsSelectCreated +=
ST.countNewSelectNodes();
4284 /// Try to match PHI node to Candidate. 4285 /// Matcher tracks the matched Phi nodes. 4288 PhiNodeSet &PhiNodesToMatch) {
4295while (!WorkList.
empty()) {
4297if (!Visited.
insert(Item).second)
4299// We iterate over all incoming values to Phi to compare them. 4300// If values are different and both of them Phi and the first one is a 4301// Phi we added (subject to match) and both of them is in the same basic 4302// block then we can match our pair if values match. So we state that 4303// these values match and add it to work list to verify that. 4304for (
auto *
B : Item.first->blocks()) {
4305Value *FirstValue = Item.first->getIncomingValueForBlock(
B);
4306Value *SecondValue = Item.second->getIncomingValueForBlock(
B);
4307if (FirstValue == SecondValue)
4310PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4311PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4313// One of them is not Phi or 4314// The first one is not Phi node from the set we'd like to match or 4315// Phi nodes from different basic blocks then 4316// we will not be able to match. 4317if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4321// If we already matched them then continue. 4322if (Matcher.
count({FirstPhi, SecondPhi}))
4324// So the values are different and does not match. So we need them to 4325// match. (But we register no more than one match per PHI node, so that 4326// we won't later try to replace them twice.) 4327if (MatchedPHIs.
insert(FirstPhi).second)
4328 Matcher.
insert({FirstPhi, SecondPhi});
4329// But me must check it. 4330 WorkList.
push_back({FirstPhi, SecondPhi});
4336 /// For the given set of PHI nodes (in the SimplificationTracker) try 4337 /// to find their equivalents. 4338 /// Returns false if this matching fails and creation of new Phi is disabled. 4339bool MatchPhiSet(SimplificationTracker &ST,
bool AllowNewPhiNodes,
4340unsigned &PhiNotMatchedCount) {
4341// Matched and PhiNodesToMatch iterate their elements in a deterministic 4342// order, so the replacements (ReplacePhi) are also done in a deterministic 4346 PhiNodeSet &PhiNodesToMatch =
ST.newPhiNodes();
4347while (PhiNodesToMatch.size()) {
4350// Add us, if no Phi nodes in the basic block we do not match. 4351 WillNotMatch.
clear();
4354// Traverse all Phis until we found equivalent or fail to do that. 4355bool IsMatched =
false;
4356for (
auto &
P :
PHI->getParent()->phis()) {
4357// Skip new Phi nodes. 4358if (PhiNodesToMatch.count(&
P))
4360if ((IsMatched = MatchPhiNode(
PHI, &
P, Matched, PhiNodesToMatch)))
4362// If it does not match, collect all Phi nodes from matcher. 4363// if we end up with no match, them all these Phi nodes will not match 4365for (
auto M : Matched)
4370// Replace all matched values and erase them. 4371for (
auto MV : Matched)
4372ST.ReplacePhi(MV.first, MV.second);
4376// If we are not allowed to create new nodes then bail out. 4377if (!AllowNewPhiNodes)
4379// Just remove all seen values in matcher. They will not match anything. 4380 PhiNotMatchedCount += WillNotMatch.
size();
4381for (
auto *
P : WillNotMatch)
4382 PhiNodesToMatch.erase(
P);
4386 /// Fill the placeholders with values from predecessors and simplify them. 4387void FillPlaceholders(FoldAddrToValueMapping &Map,
4389 SimplificationTracker &ST) {
4390while (!TraverseOrder.
empty()) {
4392assert(
Map.contains(Current) &&
"No node to fill!!!");
4396// CurrentValue also must be Select. 4397auto *CurrentSelect = cast<SelectInst>(Current);
4398auto *TrueValue = CurrentSelect->getTrueValue();
4399assert(
Map.contains(TrueValue) &&
"No True Value!");
4400Select->setTrueValue(
ST.Get(Map[TrueValue]));
4401auto *FalseValue = CurrentSelect->getFalseValue();
4402assert(
Map.contains(FalseValue) &&
"No False Value!");
4403Select->setFalseValue(
ST.Get(Map[FalseValue]));
4405// Must be a Phi node then. 4406auto *
PHI = cast<PHINode>(V);
4407// Fill the Phi node with values from predecessors. 4409Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(
B);
4410assert(
Map.contains(PV) &&
"No predecessor Value!");
4411PHI->addIncoming(
ST.Get(Map[PV]),
B);
4414Map[Current] =
ST.Simplify(V);
4418 /// Starting from original value recursively iterates over def-use chain up to 4419 /// known ending values represented in a map. For each traversed phi/select 4420 /// inserts a placeholder Phi or Select. 4421 /// Reports all new created Phi/Select nodes by adding them to set. 4422 /// Also reports and order in what values have been traversed. 4423void InsertPlaceholders(FoldAddrToValueMapping &Map,
4425 SimplificationTracker &ST) {
4427assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4428"Address must be a Phi or Select node");
4431while (!Worklist.
empty()) {
4433// if it is already visited or it is an ending value then skip it. 4434if (
Map.contains(Current))
4438// CurrentValue must be a Phi node or select. All others must be covered 4440if (
SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4441// Is it OK to get metadata from OrigSelect?! 4442// Create a Select placeholder with dummy value. 4445 CurrentSelect->getName(),
4446 CurrentSelect->getIterator(), CurrentSelect);
4449// We are interested in True and False values. 4450 Worklist.
push_back(CurrentSelect->getTrueValue());
4451 Worklist.
push_back(CurrentSelect->getFalseValue());
4453// It must be a Phi node then. 4454PHINode *CurrentPhi = cast<PHINode>(Current);
4459ST.insertNewPhi(
PHI);
4465bool addrModeCombiningAllowed() {
4468switch (DifferentField) {
4471case ExtAddrMode::BaseRegField:
4473case ExtAddrMode::BaseGVField:
4475case ExtAddrMode::BaseOffsField:
4477case ExtAddrMode::ScaledRegField:
4482}
// end anonymous namespace 4484/// Try adding ScaleReg*Scale to the current addressing mode. 4485/// Return true and update AddrMode if this addr mode is legal for the target, 4487bool AddressingModeMatcher::matchScaledValue(
Value *ScaleReg, int64_t Scale,
4489// If Scale is 1, then this is the same as adding ScaleReg to the addressing 4490// mode. Just process that directly. 4492return matchAddr(ScaleReg,
Depth);
4494// If the scale is 0, it takes nothing to add this. 4498// If we already have a scale of this value, we can add to it, otherwise, we 4499// need an available scale field. 4505// Add scale to turn X*4+X*3 -> X*7. This could also do things like 4506// [A+B + A*7] -> [B+A*8]. 4507 TestAddrMode.
Scale += Scale;
4510// If the new address isn't legal, bail out. 4514// It was legal, so commit it. 4517// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 4518// to see if ScaleReg is actually X+C. If so, we can turn this into adding 4519// X*Scale + C*Scale to addr mode. If we found available IV increment, do not 4520// go any further: we can reuse it and cannot eliminate it. 4522Value *AddLHS =
nullptr;
4523if (isa<Instruction>(ScaleReg) &&
// not a constant expr. 4526 TestAddrMode.InBounds =
false;
4530// If this addressing mode is legal, commit it and remember that we folded 4533 AddrModeInsts.
push_back(cast<Instruction>(ScaleReg));
4537// Restore status quo. 4541// If this is an add recurrence with a constant step, return the increment 4542// instruction and the canonicalized step. 4543auto GetConstantStep =
4544 [
this](
constValue *
V) -> std::optional<std::pair<Instruction *, APInt>> {
4545auto *PN = dyn_cast<PHINode>(V);
4551// TODO: The result of the intrinsics above is two-complement. However when 4552// IV inc is expressed as add or sub, iv.next is potentially a poison value. 4553// If it has nuw or nsw flags, we need to make sure that these flags are 4554// inferrable at the point of memory instruction. Otherwise we are replacing 4555// well-defined two-complement computation with poison. Currently, to avoid 4556// potentially complex analysis needed to prove this, we reject such cases. 4557if (
auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4558if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4560if (
auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4561return std::make_pair(IVInc->first, ConstantStep->getValue());
4565// Try to account for the following special case: 4566// 1. ScaleReg is an inductive variable; 4567// 2. We use it with non-zero offset; 4568// 3. IV's increment is available at the point of memory instruction. 4570// In this case, we may reuse the IV increment instead of the IV Phi to 4571// achieve the following advantages: 4572// 1. If IV step matches the offset, we will have no need in the offset; 4573// 2. Even if they don't match, we will reduce the overlap of living IV 4574// and IV increment, that will potentially lead to better register 4577if (
auto IVStep = GetConstantStep(ScaleReg)) {
4579// The following assert is important to ensure a lack of infinite loops. 4580// This transforms is (intentionally) the inverse of the one just above. 4581// If they don't agree on the definition of an increment, we'd alternate 4582// back and forth indefinitely. 4584APInt Step = IVStep->second;
4586if (
Offset.isSignedIntN(64)) {
4587 TestAddrMode.InBounds =
false;
4589 TestAddrMode.BaseOffs -=
Offset.getLimitedValue();
4590// If this addressing mode is legal, commit it.. 4591// (Note that we defer the (expensive) domtree base legality check 4592// to the very last possible point.) 4594 getDTFn().
dominates(IVInc, MemoryInst)) {
4595 AddrModeInsts.
push_back(cast<Instruction>(IVInc));
4599// Restore status quo. 4605// Otherwise, just return what we have. 4609/// This is a little filter, which returns true if an addressing computation 4610/// involving I might be folded into a load/store accessing it. 4611/// This doesn't need to be perfect, but needs to accept at least 4612/// the set of instructions that MatchOperationAddr can. 4614switch (
I->getOpcode()) {
4615case Instruction::BitCast:
4616case Instruction::AddrSpaceCast:
4617// Don't touch identity bitcasts. 4618if (
I->getType() ==
I->getOperand(0)->getType())
4620returnI->getType()->isIntOrPtrTy();
4621case Instruction::PtrToInt:
4622// PtrToInt is always a noop, as we know that the int type is pointer sized. 4624case Instruction::IntToPtr:
4625// We know the input is intptr_t, so this is foldable. 4627case Instruction::Add:
4629case Instruction::Mul:
4630case Instruction::Shl:
4631// Can only handle X*C and X << C. 4632return isa<ConstantInt>(
I->getOperand(1));
4633case Instruction::GetElementPtr:
4640/// Check whether or not \p Val is a legal instruction for \p TLI. 4641/// \note \p Val is assumed to be the product of some type promotion. 4642/// Therefore if \p Val has an undefined state in \p TLI, this is assumed 4643/// to be legal, as the non-promoted value would have had the same state. 4646Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4650// If the ISDOpcode is undefined, it was undefined before the promotion. 4653// Otherwise, check if the promoted instruction is legal or not. 4660/// Hepler class to perform type promotion. 4661classTypePromotionHelper {
4662 /// Utility function to add a promoted instruction \p ExtOpnd to 4663 /// \p PromotedInsts and record the type of extension we have seen. 4664staticvoid addPromotedInst(InstrToOrigTy &PromotedInsts,
4666 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4667 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4668if (It != PromotedInsts.end()) {
4669// If the new extension is same as original, the information in 4670// PromotedInsts[ExtOpnd] is still correct. 4671if (It->second.getInt() == ExtTy)
4674// Now the new extension is different from old extension, we make 4675// the type information invalid by setting extension type to 4677 ExtTy = BothExtension;
4679 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->
getType(), ExtTy);
4682 /// Utility function to query the original type of instruction \p Opnd 4683 /// with a matched extension type. If the extension doesn't match, we 4684 /// cannot use the information we had on the original type. 4685 /// BothExtension doesn't match any extension type. 4686staticconstType *getOrigType(
const InstrToOrigTy &PromotedInsts,
4688 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4689 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4690if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4691return It->second.getPointer();
4695 /// Utility function to check whether or not a sign or zero extension 4696 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by 4697 /// either using the operands of \p Inst or promoting \p Inst. 4698 /// The type of the extension is defined by \p IsSExt. 4699 /// In other words, check if: 4700 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. 4701 /// #1 Promotion applies: 4702 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). 4703 /// #2 Operand reuses: 4704 /// ext opnd1 to ConsideredExtType. 4705 /// \p PromotedInsts maps the instructions to their type before promotion. 4706staticbool canGetThrough(
constInstruction *Inst,
Type *ConsideredExtType,
4707const InstrToOrigTy &PromotedInsts,
bool IsSExt);
4709 /// Utility function to determine if \p OpIdx should be promoted when 4710 /// promoting \p Inst. 4711staticbool shouldExtOperand(
constInstruction *Inst,
int OpIdx) {
4712return !(isa<SelectInst>(Inst) && OpIdx == 0);
4715 /// Utility function to promote the operand of \p Ext when this 4716 /// operand is a promotable trunc or sext or zext. 4717 /// \p PromotedInsts maps the instructions to their type before promotion. 4718 /// \p CreatedInstsCost[out] contains the cost of all instructions 4719 /// created to promote the operand of Ext. 4720 /// Newly added extensions are inserted in \p Exts. 4721 /// Newly added truncates are inserted in \p Truncs. 4722 /// Should never be called directly. 4723 /// \return The promoted value which is used instead of Ext. 4724staticValue *promoteOperandForTruncAndAnyExt(
4726 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4730 /// Utility function to promote the operand of \p Ext when this 4731 /// operand is promotable and is not a supported trunc or sext. 4732 /// \p PromotedInsts maps the instructions to their type before promotion. 4733 /// \p CreatedInstsCost[out] contains the cost of all the instructions 4734 /// created to promote the operand of Ext. 4735 /// Newly added extensions are inserted in \p Exts. 4736 /// Newly added truncates are inserted in \p Truncs. 4737 /// Should never be called directly. 4738 /// \return The promoted value which is used instead of Ext. 4740 TypePromotionTransaction &TPT,
4741 InstrToOrigTy &PromotedInsts,
4742unsigned &CreatedInstsCost,
4747 /// \see promoteOperandForOther. 4748staticValue *signExtendOperandForOther(
4750 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4753return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4754 Exts, Truncs, TLI,
true);
4757 /// \see promoteOperandForOther. 4758staticValue *zeroExtendOperandForOther(
4760 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4763return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4764 Exts, Truncs, TLI,
false);
4768 /// Type for the utility function that promotes the operand of Ext. 4770 InstrToOrigTy &PromotedInsts,
4771unsigned &CreatedInstsCost,
4776 /// Given a sign/zero extend instruction \p Ext, return the appropriate 4777 /// action to promote the operand of \p Ext instead of using Ext. 4778 /// \return NULL if no promotable action is possible with the current 4780 /// \p InsertedInsts keeps track of all the instructions inserted by the 4781 /// other CodeGenPrepare optimizations. This information is important 4782 /// because we do not want to promote these instructions as CodeGenPrepare 4783 /// will reinsert them later. Thus creating an infinite loop: create/remove. 4784 /// \p PromotedInsts maps the instructions to their type before promotion. 4785static Action getAction(
Instruction *Ext,
const SetOfInstrs &InsertedInsts,
4787const InstrToOrigTy &PromotedInsts);
4790}
// end anonymous namespace 4792bool TypePromotionHelper::canGetThrough(
constInstruction *Inst,
4793Type *ConsideredExtType,
4794const InstrToOrigTy &PromotedInsts,
4796// The promotion helper does not know how to deal with vector types yet. 4797// To be able to fix that, we would need to fix the places where we 4798// statically extend, e.g., constants and such. 4802// We can always get through zext. 4803if (isa<ZExtInst>(Inst))
4806// sext(sext) is ok too. 4807if (IsSExt && isa<SExtInst>(Inst))
4810// We can get through binary operator, if it is legal. In other words, the 4811// binary operator must have a nuw or nsw flag. 4812if (
constauto *BinOp = dyn_cast<BinaryOperator>(Inst))
4813if (isa<OverflowingBinaryOperator>(BinOp) &&
4814 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4815 (IsSExt && BinOp->hasNoSignedWrap())))
4818// ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) 4819if ((Inst->
getOpcode() == Instruction::And ||
4823// ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) 4824if (Inst->
getOpcode() == Instruction::Xor) {
4825// Make sure it is not a NOT. 4826if (
constauto *Cst = dyn_cast<ConstantInt>(Inst->
getOperand(1)))
4827if (!Cst->getValue().isAllOnes())
4831// zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) 4832// It may change a poisoned value into a regular value, like 4833// zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12 4834// poisoned value regular value 4835// It should be OK since undef covers valid value. 4836if (Inst->
getOpcode() == Instruction::LShr && !IsSExt)
4839// and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) 4840// It may change a poisoned value into a regular value, like 4841// zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12 4842// poisoned value regular value 4843// It should be OK since undef covers valid value. 4845constauto *ExtInst = cast<const Instruction>(*Inst->
user_begin());
4846if (ExtInst->hasOneUse()) {
4847constauto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4848if (AndInst && AndInst->getOpcode() == Instruction::And) {
4849constauto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4857// Check if we can do the following simplification. 4858// ext(trunc(opnd)) --> ext(opnd) 4859if (!isa<TruncInst>(Inst))
4863// Check if we can use this operand in the extension. 4864// If the type is larger than the result type of the extension, we cannot. 4870// If the operand of the truncate is not an instruction, we will not have 4871// any information on the dropped bits. 4872// (Actually we could for constant but it is not worth the extra logic). 4873Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4877// Check if the source of the type is narrow enough. 4878// I.e., check that trunc just drops extended bits of the same kind of 4880// #1 get the type of the operand and check the kind of the extended bits. 4881constType *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4884elseif ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4889// #2 check that the truncate just drops extended bits. 4894TypePromotionHelper::Action TypePromotionHelper::getAction(
4897assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4898"Unexpected instruction type");
4901bool IsSExt = isa<SExtInst>(Ext);
4902// If the operand of the extension is not an instruction, we cannot 4904// If it, check we can get through. 4905if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4908// Do not promote if the operand has been added by codegenprepare. 4909// Otherwise, it means we are undoing an optimization that is likely to be 4910// redone, thus causing potential infinite loop. 4911if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4914// SExt or Trunc instructions. 4915// Return the related handler. 4916if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4917 isa<ZExtInst>(ExtOpnd))
4918return promoteOperandForTruncAndAnyExt;
4920// Regular instruction. 4921// Abort early if we will have to insert non-free instructions. 4924return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4927Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4929 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4932// By construction, the operand of SExt is an instruction. Otherwise we cannot 4933// get through it and this method should not be called. 4935Value *ExtVal = SExt;
4936bool HasMergedNonFreeExt =
false;
4937if (isa<ZExtInst>(SExtOpnd)) {
4938// Replace s|zext(zext(opnd)) 4940 HasMergedNonFreeExt = !TLI.
isExtFree(SExtOpnd);
4944 TPT.eraseInstruction(SExt);
4947// Replace z|sext(trunc(opnd)) or sext(sext(opnd)) 4949 TPT.setOperand(SExt, 0, SExtOpnd->
getOperand(0));
4951 CreatedInstsCost = 0;
4955 TPT.eraseInstruction(SExtOpnd);
4957// Check if the extension is still needed. 4958Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4963 CreatedInstsCost = !TLI.
isExtFree(ExtInst) && !HasMergedNonFreeExt;
4968// At this point we have: ext ty opnd to ty. 4969// Reassign the uses of ExtInst to the opnd and remove ExtInst. 4971 TPT.eraseInstruction(ExtInst, NextVal);
4975Value *TypePromotionHelper::promoteOperandForOther(
4977 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4981// By construction, the operand of Ext is an instruction. Otherwise we cannot 4982// get through it and this method should not be called. 4984 CreatedInstsCost = 0;
4986// ExtOpnd will be promoted. 4987// All its uses, but Ext, will need to use a truncated value of the 4989// Create the truncate now. 4990Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->
getType());
4991if (
Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4992// Insert it just after the definition. 4993 ITrunc->moveAfter(ExtOpnd);
4998 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4999// Restore the operand of Ext (which has been replaced by the previous call 5000// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 5001 TPT.setOperand(Ext, 0, ExtOpnd);
5004// Get through the Instruction: 5005// 1. Update its type. 5006// 2. Replace the uses of Ext by Inst. 5007// 3. Extend each operand that needs to be extended. 5009// Remember the original type of the instruction before promotion. 5010// This is useful to know that the high bits are sign extended bits. 5011 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5013 TPT.mutateType(ExtOpnd,
Ext->getType());
5015 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5018for (
int OpIdx = 0, EndOpIdx = ExtOpnd->
getNumOperands(); OpIdx != EndOpIdx;
5022 !shouldExtOperand(ExtOpnd, OpIdx)) {
5026// Check if we can statically extend the operand. 5028if (
constConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5030unsignedBitWidth =
Ext->getType()->getIntegerBitWidth();
5033 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(
Ext->getType(), CstVal));
5036// UndefValue are typed, so we have to statically sign extend them. 5037if (isa<UndefValue>(Opnd)) {
5043// Otherwise we have to explicitly sign extend the operand. 5044Value *ValForExtOpnd = IsSExt
5045 ? TPT.createSExt(ExtOpnd, Opnd,
Ext->getType())
5046 : TPT.createZExt(ExtOpnd, Opnd,
Ext->getType());
5047 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5048Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5055 CreatedInstsCost += !TLI.
isExtFree(InstForExtOpnd);
5058 TPT.eraseInstruction(Ext);
5062/// Check whether or not promoting an instruction to a wider type is profitable. 5063/// \p NewCost gives the cost of extension instructions created by the 5065/// \p OldCost gives the cost of extension instructions before the promotion 5066/// plus the number of instructions that have been 5067/// matched in the addressing mode the promotion. 5068/// \p PromotedOperand is the value that has been promoted. 5069/// \return True if the promotion is profitable, false otherwise. 5070bool AddressingModeMatcher::isPromotionProfitable(
5071unsigned NewCost,
unsigned OldCost,
Value *PromotedOperand)
const{
5072LLVM_DEBUG(
dbgs() <<
"OldCost: " << OldCost <<
"\tNewCost: " << NewCost
5074// The cost of the new extensions is greater than the cost of the 5075// old extension plus what we folded. 5076// This is not profitable. 5077if (NewCost > OldCost)
5079if (NewCost < OldCost)
5081// The promotion is neutral but it may help folding the sign extension in 5082// loads for instance. 5083// Check that we did not create an illegal instruction. 5087/// Given an instruction or constant expr, see if we can fold the operation 5088/// into the addressing mode. If so, update the addressing mode and return 5089/// true, otherwise return false without modifying AddrMode. 5090/// If \p MovedAway is not NULL, it contains the information of whether or 5091/// not AddrInst has to be folded into the addressing mode on success. 5092/// If \p MovedAway == true, \p AddrInst will not be part of the addressing 5093/// because it has been moved away. 5094/// Thus AddrInst must not be added in the matched instructions. 5095/// This state can happen when AddrInst is a sext, since it may be moved away. 5096/// Therefore, AddrInst may not be valid when MovedAway is true and it must 5097/// not be referenced anymore. 5098bool AddressingModeMatcher::matchOperationAddr(
User *AddrInst,
unsigned Opcode,
5101// Avoid exponential behavior on extremely deep expression trees. 5105// By default, all matched instructions stay in place. 5110case Instruction::PtrToInt:
5111// PtrToInt is always a noop, as we know that the int type is pointer sized. 5113case Instruction::IntToPtr: {
5116// This inttoptr is a no-op if the integer type is pointer sized. 5121case Instruction::BitCast:
5122// BitCast is always a noop, and we can handle it as long as it is 5123// int->int or pointer->pointer (we don't want int<->fp or something). 5125// Don't touch identity bitcasts. These were probably put here by LSR, 5126// and we don't want to mess around with them. Assume it knows what it 5131case Instruction::AddrSpaceCast: {
5139case Instruction::Add: {
5140// Check to see if we can merge in one operand, then the other. If so, we 5143unsigned OldSize = AddrModeInsts.
size();
5144// Start a transaction at this point. 5145// The LHS may match but not the RHS. 5146// Therefore, we need a higher level restoration point to undo partially 5147// matched operation. 5148 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5149 TPT.getRestorationPoint();
5151// Try to match an integer constant second to increase its chance of ending 5152// up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`. 5153intFirst = 0, Second = 1;
5155 && !isa<ConstantInt>(AddrInst->
getOperand(Second)))
5162// Restore the old addr mode info. 5164 AddrModeInsts.
resize(OldSize);
5165 TPT.rollback(LastKnownGood);
5167// Otherwise this was over-aggressive. Try merging operands in the opposite 5173// Otherwise we definitely can't merge the ADD in. 5175 AddrModeInsts.
resize(OldSize);
5176 TPT.rollback(LastKnownGood);
5179// case Instruction::Or: 5180// TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 5182case Instruction::Mul:
5183case Instruction::Shl: {
5184// Can only handle X*C and X << C. 5187if (!RHS ||
RHS->getBitWidth() > 64)
5189 int64_t Scale = Opcode == Instruction::Shl
5190 ? 1LL <<
RHS->getLimitedValue(
RHS->getBitWidth() - 1)
5191 :
RHS->getSExtValue();
5195case Instruction::GetElementPtr: {
5196// Scan the GEP. We check it if it contains constant offsets and at most 5197// one variable offset. 5198int VariableOperand = -1;
5199unsigned VariableScale = 0;
5201 int64_t ConstantOffset = 0;
5203for (
unsigned i = 1, e = AddrInst->
getNumOperands(); i != e; ++i, ++GTI) {
5207 cast<ConstantInt>(AddrInst->
getOperand(i))->getZExtValue();
5212// The optimisations below currently only work for fixed offsets. 5217 dyn_cast<ConstantInt>(AddrInst->
getOperand(i))) {
5224// We only allow one variable index at the moment. 5225if (VariableOperand != -1)
5228// Remember the variable index. 5229 VariableOperand = i;
5235// A common case is for the GEP to only do a constant offset. In this case, 5236// just add it to the disp field and check validity. 5237if (VariableOperand == -1) {
5238AddrMode.BaseOffs += ConstantOffset;
5240if (!cast<GEPOperator>(AddrInst)->isInBounds())
5244AddrMode.BaseOffs -= ConstantOffset;
5248 ConstantOffset > 0) {
5249// Record GEPs with non-zero offsets as candidates for splitting in 5250// the event that the offset cannot fit into the r+i addressing mode. 5251// Simple and common case that only one GEP is used in calculating the 5252// address for the memory access. 5254auto *BaseI = dyn_cast<Instruction>(
Base);
5255auto *
GEP = cast<GetElementPtrInst>(AddrInst);
5256if (isa<Argument>(
Base) || isa<GlobalValue>(
Base) ||
5257 (BaseI && !isa<CastInst>(BaseI) &&
5258 !isa<GetElementPtrInst>(BaseI))) {
5259// Make sure the parent block allows inserting non-PHI instructions 5260// before the terminator. 5262 : &
GEP->getFunction()->getEntryBlock();
5264 LargeOffsetGEP = std::make_pair(
GEP, ConstantOffset);
5271// Save the valid addressing mode in case we can't match. 5273unsigned OldSize = AddrModeInsts.
size();
5275// See if the scale and offset amount is valid for this target. 5276AddrMode.BaseOffs += ConstantOffset;
5277if (!cast<GEPOperator>(AddrInst)->isInBounds())
5280// Match the base operand of the GEP. 5282// If it couldn't be matched, just stuff the value in a register. 5285 AddrModeInsts.
resize(OldSize);
5292// Match the remaining variable portion of the GEP. 5293if (!matchScaledValue(AddrInst->
getOperand(VariableOperand), VariableScale,
5295// If it couldn't be matched, try stuffing the base into a register 5296// instead of matching it, and retrying the match of the scale. 5298 AddrModeInsts.
resize(OldSize);
5303AddrMode.BaseOffs += ConstantOffset;
5304if (!matchScaledValue(AddrInst->
getOperand(VariableOperand),
5305 VariableScale,
Depth)) {
5306// If even that didn't work, bail. 5308 AddrModeInsts.
resize(OldSize);
5315case Instruction::SExt:
5316case Instruction::ZExt: {
5321// Try to move this ext out of the way of the addressing mode. 5322// Ask for a method for doing so. 5323 TypePromotionHelper::Action TPH =
5324 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5328 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5329 TPT.getRestorationPoint();
5330unsigned CreatedInstsCost = 0;
5332Value *PromotedOperand =
5333 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost,
nullptr,
nullptr, TLI);
5334// SExt has been moved away. 5335// Thus either it will be rematched later in the recursive calls or it is 5336// gone. Anyway, we must not fold it into the addressing mode at this point. 5340// addr = gep base, idx 5342// promotedOpnd = ext opnd <- no match here 5343// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 5344// addr = gep base, op <- match 5349"TypePromotionHelper should have filtered out those cases");
5352unsigned OldSize = AddrModeInsts.
size();
5354if (!matchAddr(PromotedOperand,
Depth) ||
5355// The total of the new cost is equal to the cost of the created 5357// The total of the old cost is equal to the cost of the extension plus 5358// what we have saved in the addressing mode. 5359 !isPromotionProfitable(CreatedInstsCost,
5360 ExtCost + (AddrModeInsts.
size() - OldSize),
5363 AddrModeInsts.
resize(OldSize);
5364LLVM_DEBUG(
dbgs() <<
"Sign extension does not pay off: rollback\n");
5365 TPT.rollback(LastKnownGood);
5370case Instruction::Call:
5372if (
II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5383/// If we can, try to add the value of 'Addr' into the current addressing mode. 5384/// If Addr can't be added to AddrMode this returns false and leaves AddrMode 5385/// unmodified. This assumes that Addr is either a pointer type or intptr_t 5388bool AddressingModeMatcher::matchAddr(
Value *
Addr,
unsignedDepth) {
5389// Start a transaction at this point that we will rollback if the matching 5391 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5392 TPT.getRestorationPoint();
5395// Fold in immediates if legal for the target. 5402// If this is a global variable, try to fold it into the addressing mode. 5411unsigned OldSize = AddrModeInsts.
size();
5413// Check to see if it is possible to fold this operation. 5414bool MovedAway =
false;
5415if (matchOperationAddr(
I,
I->getOpcode(),
Depth, &MovedAway)) {
5416// This instruction may have been moved away. If so, there is nothing 5420// Okay, it's possible to fold this. Check to see if it is actually 5421// *profitable* to do so. We use a simple cost model to avoid increasing 5422// register pressure too much. 5423if (
I->hasOneUse() ||
5424 isProfitableToFoldIntoAddressingMode(
I, BackupAddrMode,
AddrMode)) {
5429// It isn't profitable to do this, roll back. 5431 AddrModeInsts.
resize(OldSize);
5432 TPT.rollback(LastKnownGood);
5435if (matchOperationAddr(CE,
CE->getOpcode(),
Depth))
5437 TPT.rollback(LastKnownGood);
5438 }
elseif (isa<ConstantPointerNull>(
Addr)) {
5439// Null pointer gets folded without affecting the addressing mode. 5443// Worse case, the target should support [reg] addressing modes. :) 5447// Still check for legality in case the target supports [imm] but not [i+r]. 5454// If the base register is already taken, see if we can do [r+r]. 5464 TPT.rollback(LastKnownGood);
5468/// Check to see if all uses of OpVal by the specified inline asm call are due 5469/// to memory operands. If so, return true, otherwise return false. 5478// Compute the constraint code and ConstraintType to use. 5481// If this asm operand is our Value*, and if it isn't an indirect memory 5482// operand, we can't fold it! TODO: Also handle C_Address? 5483if (OpInfo.CallOperandVal == OpVal &&
5485 !OpInfo.isIndirect))
5492/// Recursively walk all the uses of I until we find a memory use. 5493/// If we find an obviously non-foldable instruction, return true. 5494/// Add accessed addresses and types to MemoryUses. 5500// If we already considered this instruction, we're done. 5501if (!ConsideredInsts.
insert(
I).second)
5504// If this is an obviously unfoldable instruction, bail out. 5508// Loop over all the uses, recursively processing them. 5509for (
Use &U :
I->uses()) {
5510// Conservatively return true if we're seeing a large number or a deep chain 5511// of users. This avoids excessive compilation times in pathological cases. 5515Instruction *UserI = cast<Instruction>(U.getUser());
5516if (
LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5517 MemoryUses.push_back({&U, LI->getType()});
5521if (
StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5523returntrue;
// Storing addr, not into addr. 5524 MemoryUses.push_back({&U, SI->getValueOperand()->
getType()});
5530returntrue;
// Storing addr, not into addr. 5531 MemoryUses.push_back({&U, RMW->getValOperand()->
getType()});
5537returntrue;
// Storing addr, not into addr. 5538 MemoryUses.push_back({&U, CmpX->getCompareOperand()->
getType()});
5542if (
CallInst *CI = dyn_cast<CallInst>(UserI)) {
5543if (CI->hasFnAttr(Attribute::Cold)) {
5544// If this is a cold call, we can sink the addressing calculation into 5545// the cold path. See optimizeCallInst 5550InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5554// If this is a memory operand, we're cool, otherwise bail out. 5561 PSI, BFI, SeenInsts))
5572unsigned SeenInsts = 0;
5575 PSI, BFI, SeenInsts);
5579/// Return true if Val is already known to be live at the use site that we're 5580/// folding it into. If so, there is no cost to include it in the addressing 5581/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the 5582/// instruction already. 5583bool AddressingModeMatcher::valueAlreadyLiveAtInst(
Value *Val,
5586// If Val is either of the known-live values, we know it is live! 5587if (Val ==
nullptr || Val == KnownLive1 || Val == KnownLive2)
5590// All values other than instructions and arguments (e.g. constants) are live. 5591if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5594// If Val is a constant sized alloca in the entry block, it is live, this is 5595// true because it is just a reference to the stack/frame pointer, which is 5596// live for the whole function. 5597if (
AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5601// Check to see if this value is already used in the memory instruction's 5602// block. If so, it's already live into the block at the very least, so we 5603// can reasonably fold it. 5607/// It is possible for the addressing mode of the machine to fold the specified 5608/// instruction into a load or store that ultimately uses it. 5609/// However, the specified instruction has multiple uses. 5610/// Given this, it may actually increase register pressure to fold it 5611/// into the load. For example, consider this code: 5615/// use(Y) -> nonload/store 5619/// In this case, Y has multiple uses, and can be folded into the load of Z 5620/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 5621/// be live at the use(Y) line. If we don't fold Y into load Z, we use one 5622/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 5623/// number of computations either. 5625/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 5626/// X was live across 'load Z' for other reasons, we actually *would* want to 5627/// fold the addressing mode in the Z case. This would make Y die earlier. 5628bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5630if (IgnoreProfitability)
5633// AMBefore is the addressing mode before this instruction was folded into it, 5634// and AMAfter is the addressing mode after the instruction was folded. Get 5635// the set of registers referenced by AMAfter and subtract out those 5636// referenced by AMBefore: this is the set of values which folding in this 5637// address extends the lifetime of. 5639// Note that there are only two potential values being referenced here, 5640// BaseReg and ScaleReg (global addresses are always available, as are any 5641// folded immediates). 5644// If the BaseReg or ScaledReg was referenced by the previous addrmode, their 5645// lifetime wasn't extended by adding this instruction. 5648if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.
BaseReg, AMBefore.
ScaledReg))
5651// If folding this instruction (and it's subexprs) didn't extend any live 5652// ranges, we're ok with it. 5653if (!BaseReg && !ScaledReg)
5656// If all uses of this instruction can have the address mode sunk into them, 5657// we can remove the addressing mode and effectively trade one live register 5658// for another (at worst.) In this context, folding an addressing mode into 5659// the use is just a particularly nice way of sinking it. 5662returnfalse;
// Has a non-memory, non-foldable use! 5664// Now that we know that all uses of this instruction are part of a chain of 5665// computation involving only operations that could theoretically be folded 5666// into a memory use, loop over each of these memory operation uses and see 5667// if they could *actually* fold the instruction. The assumption is that 5668// addressing modes are cheap and that duplicating the computation involved 5669// many times is worthwhile, even on a fastpath. For sinking candidates 5670// (i.e. cold call sites), this serves as a way to prevent excessive code 5671// growth since most architectures have some reasonable small and fast way to 5672// compute an effective address. (i.e LEA on x86) 5674for (
const std::pair<Use *, Type *> &Pair : MemoryUses) {
5676Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5677Type *AddressAccessTy = Pair.second;
5678unsigned AS =
Address->getType()->getPointerAddressSpace();
5680// Do a match against the root of this address, ignoring profitability. This 5681// will tell us if the addressing mode for the memory operation will 5682// *actually* cover the shared instruction. 5684 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(
nullptr,
5686 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5687 TPT.getRestorationPoint();
5688 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI,
TRI, LI, getDTFn,
5689 AddressAccessTy, AS, UserI, Result,
5690 InsertedInsts, PromotedInsts, TPT,
5691 LargeOffsetGEP, OptSize, PSI, BFI);
5692 Matcher.IgnoreProfitability =
true;
5693boolSuccess = Matcher.matchAddr(Address, 0);
5697// The match was to check the profitability, the changes made are not 5698// part of the original matcher. Therefore, they should be dropped 5699// otherwise the original matcher will not present the right state. 5700 TPT.rollback(LastKnownGood);
5702// If the match didn't cover I, then it won't be shared by it. 5706 MatchedAddrModeInsts.
clear();
5712/// Return true if the specified values are defined in a 5713/// different basic block than BB. 5716returnI->getParent() != BB;
5720/// Sink addressing mode computation immediate before MemoryInst if doing so 5721/// can be done without increasing register pressure. The need for the 5722/// register pressure constraint means this can end up being an all or nothing 5723/// decision for all uses of the same addressing computation. 5725/// Load and Store Instructions often have addressing modes that can do 5726/// significant amounts of computation. As such, instruction selection will try 5727/// to get the load or store to do as much computation as possible for the 5728/// program. The problem is that isel can only see within a single block. As 5729/// such, we sink as much legal addressing mode work into the block as possible. 5731/// This method is used to optimize both load/store and inline asms with memory 5732/// operands. It's also used to sink addressing computations feeding into cold 5733/// call sites into their (cold) basic block. 5735/// The motivation for handling sinking into cold blocks is that doing so can 5736/// both enable other address mode sinking (by satisfying the register pressure 5737/// constraint above), and reduce register pressure globally (by removing the 5738/// addressing mode computation from the fast path entirely.). 5740Type *AccessTy,
unsigned AddrSpace) {
5743// Try to collapse single-value PHI nodes. This is necessary to undo 5744// unprofitable PRE transformations. 5749// Use a worklist to iteratively look through PHI and select nodes, and 5750// ensure that the addressing mode obtained from the non-PHI/select roots of 5751// the graph are compatible. 5752bool PhiOrSelectSeen =
false;
5755 AddressingModeCombiner AddrModes(SQ,
Addr);
5756 TypePromotionTransaction TPT(RemovedInsts);
5757 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5758 TPT.getRestorationPoint();
5759while (!worklist.
empty()) {
5762// We allow traversing cyclic Phi nodes. 5763// In case of success after this loop we ensure that traversing through 5764// Phi nodes ends up with all cases to compute address of the form 5765// BaseGV + Base + Scale * Index + Offset 5766// where Scale and Offset are constans and BaseGV, Base and Index 5767// are exactly the same Values in all cases. 5768// It means that BaseGV, Scale and Offset dominate our memory instruction 5769// and have the same value as they had in address computation represented 5770// as Phi. So we can safely sink address computation to memory instruction. 5771if (!Visited.
insert(V).second)
5774// For a PHI node, push all of its incoming values. 5775if (
PHINode *
P = dyn_cast<PHINode>(V)) {
5777 PhiOrSelectSeen =
true;
5780// Similar for select. 5781if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
5784 PhiOrSelectSeen =
true;
5788// For non-PHIs, determine the addressing mode being computed. Note that 5789// the result may differ depending on what other uses our candidate 5790// addressing instructions might have. 5791 AddrModeInsts.
clear();
5792 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(
nullptr,
5794// Defer the query (and possible computation of) the dom tree to point of 5795// actual use. It's expected that most address matches don't actually need 5799return this->getDT(*
F);
5801ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5802 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5803 *
TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5808// If splitting the underlying data structure can reduce the offset of a 5809// GEP, collect the GEP. Skip the GEPs that are the new bases of 5810// previously split data structures. 5811 LargeOffsetGEPMap[
GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5812 LargeOffsetGEPID.
insert(std::make_pair(
GEP, LargeOffsetGEPID.
size()));
5815 NewAddrMode.OriginalValue =
V;
5816if (!AddrModes.addNewAddrMode(NewAddrMode))
5820// Try to combine the AddrModes we've collected. If we couldn't collect any, 5821// or we have multiple but either couldn't combine them or combining them 5822// wouldn't do anything useful, bail out now. 5823if (!AddrModes.combineAddrModes()) {
5824 TPT.rollback(LastKnownGood);
5829// Get the combined AddrMode (or the only AddrMode, if we only had one). 5832// If all the instructions matched are already in this BB, don't do anything. 5833// If we saw a Phi node then it is not local definitely, and if we saw a 5834// select then we want to push the address calculation past it even if it's 5835// already in this BB. 5836if (!PhiOrSelectSeen &&
none_of(AddrModeInsts, [&](
Value *V) {
5844// Insert this computation right after this user. Since our caller is 5845// scanning from the top of the BB to the bottom, reuse of the expr are 5846// guaranteed to happen later. 5849// Now that we determined the addressing expression we want to use and know 5850// that we have to sink it into this block. Check to see if we have already 5851// done this for some other load/store instr in this block. If so, reuse 5852// the computation. Before attempting reuse, check if the address is valid 5853// as it may have been erased. 5858Type *IntPtrTy =
DL->getIntPtrType(
Addr->getType());
5861 <<
" for " << *MemoryInst <<
"\n");
5864Addr->getType()->getPointerAddressSpace() &&
5865 !
DL->isNonIntegralPointerType(
Addr->getType())) {
5866// There are two reasons the address spaces might not match: a no-op 5867// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a 5868// ptrtoint/inttoptr pair to ensure we match the original semantics. 5869// TODO: allow bitcast between different address space pointers with the 5871 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy,
"sunkaddr");
5873 Builder.CreateIntToPtr(SunkAddr,
Addr->getType(),
"sunkaddr");
5875 SunkAddr = Builder.CreatePointerCast(SunkAddr,
Addr->getType());
5879// By default, we use the GEP-based method when AA is used later. This 5880// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 5882 <<
" for " << *MemoryInst <<
"\n");
5883Value *ResultPtr =
nullptr, *ResultIndex =
nullptr;
5885// First, find the pointer. 5892// We can't add more than one pointer together, nor can we scale a 5893// pointer (both of which seem meaningless). 5894if (ResultPtr ||
AddrMode.Scale != 1)
5901// It is only safe to sign extend the BaseReg if we know that the math 5902// required to create it did not overflow before we extend it. Since 5903// the original IR value was tossed in favor of a constant back when 5904// the AddrMode was created we need to bail out gracefully if widths 5905// do not match instead of extending it. 5907// (See below for code to add the scale.) 5916if (BaseGV !=
nullptr) {
5921 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
5927// If the real base value actually came from an inttoptr, then the matcher 5928// will look through it and provide only the integer value. In that case, 5930if (!
DL->isNonIntegralPointerType(
Addr->getType())) {
5931if (!ResultPtr &&
AddrMode.BaseReg) {
5932 ResultPtr = Builder.CreateIntToPtr(
AddrMode.BaseReg,
Addr->getType(),
5935 }
elseif (!ResultPtr &&
AddrMode.Scale == 1) {
5936 ResultPtr = Builder.CreateIntToPtr(
AddrMode.ScaledReg,
Addr->getType(),
5945 }
elseif (!ResultPtr) {
5949 Builder.getPtrTy(
Addr->getType()->getPointerAddressSpace());
5951// Start with the base register. Do this first so that subsequent address 5952// matching finds it last, which will prevent it from trying to match it 5953// as the scaled value in case it happens to be a mul. That would be 5954// problematic if we've sunk a different mul for the scale, because then 5955// we'd end up sinking both muls. 5958if (
V->getType() != IntPtrTy)
5959V = Builder.CreateIntCast(V, IntPtrTy,
/*isSigned=*/true,
"sunkaddr");
5964// Add the scale value. 5967if (
V->getType() == IntPtrTy) {
5971 cast<IntegerType>(
V->getType())->getBitWidth() &&
5972"We can't transform if ScaledReg is too narrow");
5973V = Builder.CreateTrunc(V, IntPtrTy,
"sunkaddr");
5977V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy,
AddrMode.Scale),
5980 ResultIndex = Builder.CreateAdd(ResultIndex, V,
"sunkaddr");
5985// Add in the Base Offset if present. 5989// We need to add this separately from the scale above to help with 5990// SDAG consecutive load/store merging. 5991if (ResultPtr->
getType() != I8PtrTy)
5992 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5993 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex,
"sunkaddr",
6001 SunkAddr = ResultPtr;
6003if (ResultPtr->
getType() != I8PtrTy)
6004 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6005 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex,
"sunkaddr",
6011Addr->getType()->getPointerAddressSpace() &&
6012 !
DL->isNonIntegralPointerType(
Addr->getType())) {
6013// There are two reasons the address spaces might not match: a no-op 6014// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a 6015// ptrtoint/inttoptr pair to ensure we match the original semantics. 6016// TODO: allow bitcast between different address space pointers with 6018 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy,
"sunkaddr");
6020 Builder.CreateIntToPtr(SunkAddr,
Addr->getType(),
"sunkaddr");
6022 SunkAddr = Builder.CreatePointerCast(SunkAddr,
Addr->getType());
6026// We'd require a ptrtoint/inttoptr down the line, which we can't do for 6027// non-integral pointers, so in that case bail out now. 6031PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6032if (
DL->isNonIntegralPointerType(
Addr->getType()) ||
6033 (BasePtrTy &&
DL->isNonIntegralPointerType(BasePtrTy)) ||
6034 (ScalePtrTy &&
DL->isNonIntegralPointerType(ScalePtrTy)) ||
6036DL->isNonIntegralPointerType(
AddrMode.BaseGV->getType())))
6040 <<
" for " << *MemoryInst <<
"\n");
6041Type *IntPtrTy =
DL->getIntPtrType(
Addr->getType());
6044// Start with the base register. Do this first so that subsequent address 6045// matching finds it last, which will prevent it from trying to match it 6046// as the scaled value in case it happens to be a mul. That would be 6047// problematic if we've sunk a different mul for the scale, because then 6048// we'd end up sinking both muls. 6051if (
V->getType()->isPointerTy())
6052V = Builder.CreatePtrToInt(V, IntPtrTy,
"sunkaddr");
6053if (
V->getType() != IntPtrTy)
6054V = Builder.CreateIntCast(V, IntPtrTy,
/*isSigned=*/true,
"sunkaddr");
6058// Add the scale value. 6061if (
V->getType() == IntPtrTy) {
6063 }
elseif (
V->getType()->isPointerTy()) {
6064V = Builder.CreatePtrToInt(V, IntPtrTy,
"sunkaddr");
6065 }
elseif (cast<IntegerType>(IntPtrTy)->
getBitWidth() <
6066 cast<IntegerType>(
V->getType())->getBitWidth()) {
6067V = Builder.CreateTrunc(V, IntPtrTy,
"sunkaddr");
6069// It is only safe to sign extend the BaseReg if we know that the math 6070// required to create it did not overflow before we extend it. Since 6071// the original IR value was tossed in favor of a constant back when 6072// the AddrMode was created we need to bail out gracefully if widths 6073// do not match instead of extending it. 6076I->eraseFromParent();
6080V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy,
AddrMode.Scale),
6083Result = Builder.CreateAdd(Result, V,
"sunkaddr");
6088// Add in the BaseGV if present. 6090if (BaseGV !=
nullptr) {
6093 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6097Value *
V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy,
"sunkaddr");
6099Result = Builder.CreateAdd(Result, V,
"sunkaddr");
6104// Add in the Base Offset if present. 6108Result = Builder.CreateAdd(Result, V,
"sunkaddr");
6116 SunkAddr = Builder.CreateIntToPtr(Result,
Addr->getType(),
"sunkaddr");
6120// Store the newly computed address into the cache. In the case we reused a 6121// value, this should be idempotent. 6124// If we have no uses, recursively delete the value and all dead instructions 6127 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6128 RecursivelyDeleteTriviallyDeadInstructions(
6129 Repl, TLInfo, nullptr,
6130 [&](Value *V) { removeAllAssertingVHReferences(V); });
6137/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find 6138/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can 6139/// only handle a 2 operand GEP in the same basic block or a splat constant 6140/// vector. The 2 operands to the GEP must have a scalar pointer and a vector 6143/// If the existing GEP has a vector base pointer that is splat, we can look 6144/// through the splat to find the scalar pointer. If we can't find a scalar 6145/// pointer there's nothing we can do. 6147/// If we have a GEP with more than 2 indices where the middle indices are all 6148/// zeroes, we can replace it with 2 GEPs where the second has 2 operands. 6150/// If the final index isn't a vector or is a splat, we can emit a scalar GEP 6151/// followed by a GEP with an all zeroes vector index. This will enable 6152/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a 6154bool CodeGenPrepare::optimizeGatherScatterInst(
Instruction *MemoryInst,
6158if (
constauto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr)) {
6159// Don't optimize GEPs that don't have indices. 6160if (!
GEP->hasIndices())
6163// If the GEP and the gather/scatter aren't in the same BB, don't optimize. 6164// FIXME: We should support this by sinking the GEP. 6170bool RewriteGEP =
false;
6172if (Ops[0]->
getType()->isVectorTy()) {
6179unsigned FinalIndex = Ops.size() - 1;
6181// Ensure all but the last index is 0. 6182// FIXME: This isn't strictly required. All that's required is that they are 6183// all scalars or splats. 6184for (
unsigned i = 1; i < FinalIndex; ++i) {
6185auto *
C = dyn_cast<Constant>(Ops[i]);
6188if (isa<VectorType>(
C->getType()))
6189C =
C->getSplatValue();
6190auto *CI = dyn_cast_or_null<ConstantInt>(
C);
6193// Scalarize the index if needed. 6197// Try to scalarize the final index. 6198if (Ops[FinalIndex]->
getType()->isVectorTy()) {
6200auto *
C = dyn_cast<ConstantInt>(V);
6201// Don't scalarize all zeros vector. 6202if (!
C || !
C->isZero()) {
6209// If we made any changes or the we have extra operands, we need to generate 6211if (!RewriteGEP && Ops.size() == 2)
6214auto NumElts = cast<VectorType>(
Ptr->getType())->getElementCount();
6218Type *SourceTy =
GEP->getSourceElementType();
6219Type *ScalarIndexTy =
DL->getIndexType(Ops[0]->
getType()->getScalarType());
6221// If the final index isn't a vector, emit a scalar GEP containing all ops 6222// and a vector GEP with all zeroes final index. 6223if (!Ops[FinalIndex]->
getType()->isVectorTy()) {
6224 NewAddr = Builder.CreateGEP(SourceTy, Ops[0],
ArrayRef(Ops).drop_front());
6225auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6227 SourceTy,
ArrayRef(Ops).drop_front());
6234// Create a scalar GEP if there are more than 2 operands. 6235if (Ops.size() != 2) {
6236// Replace the last index with 0. 6241 SourceTy,
ArrayRef(Ops).drop_front());
6244// Now create the GEP with scalar pointer and vector index. 6245 NewAddr = Builder.CreateGEP(SourceTy,
Base, Index);
6247 }
elseif (!isa<Constant>(
Ptr)) {
6248// Not a GEP, maybe its a splat and we can create a GEP to enable 6249// SelectionDAGBuilder to use it as a uniform base. 6254auto NumElts = cast<VectorType>(
Ptr->getType())->getElementCount();
6258// Emit a vector GEP with a scalar pointer and all 0s vector index. 6259Type *ScalarIndexTy =
DL->getIndexType(
V->getType()->getScalarType());
6260auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6263 Intrinsic::masked_gather) {
6267 Intrinsic::masked_scatter);
6272// Constant, SelectionDAGBuilder knows to check if its a splat. 6278// If we have no uses, recursively delete the value and all dead instructions 6280if (
Ptr->use_empty())
6283 [&](
Value *V) { removeAllAssertingVHReferences(V); });
6288/// If there are any memory operands, use OptimizeMemoryInst to sink their 6289/// address computing into the block when possible / profitable. 6290bool CodeGenPrepare::optimizeInlineAsmInst(
CallInst *CS) {
6291bool MadeChange =
false;
6299// Compute the constraint code and ConstraintType to use. 6302// TODO: Also handle C_Address? 6304 OpInfo.isIndirect) {
6306 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->
getType(), ~0u);
6314/// Check if all the uses of \p Val are equivalent (or free) zero or 6319bool IsSExt = isa<SExtInst>(FirstUser);
6323if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6326// Same input and output types: Same instruction after CSE. 6330// If IsSExt is true, we are in this situation: 6332// b = sext ty1 a to ty2 6333// c = sext ty1 a to ty3 6334// Assuming ty2 is shorter than ty3, this could be turned into: 6336// b = sext ty1 a to ty2 6337// c = sext ty2 b to ty3 6338// However, the last sext is not free. 6342// This is a ZExt, maybe this is free to extend from one type to another. 6343// In that case, we would not account for a different use. 6358// All uses are the same or can be derived from one another for free. 6362/// Try to speculatively promote extensions in \p Exts and continue 6363/// promoting through newly promoted operands recursively as far as doing so is 6364/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts. 6365/// When some promotion happened, \p TPT contains the proper state to revert 6368/// \return true if some promotion happened, false otherwise. 6369bool CodeGenPrepare::tryToPromoteExts(
6372unsigned CreatedInstsCost) {
6373bool Promoted =
false;
6375// Iterate over all the extensions to try to promote them. 6376for (
auto *
I : Exts) {
6377// Early check if we directly have ext(load). 6378if (isa<LoadInst>(
I->getOperand(0))) {
6383// Check whether or not we want to do any promotion. The reason we have 6384// this check inside the for loop is to catch the case where an extension 6385// is directly fed by a load because in such case the extension can be moved 6386// up without any promotion on its operands. 6390// Get the action to perform the promotion. 6391 TypePromotionHelper::Action TPH =
6392 TypePromotionHelper::getAction(
I, InsertedInsts, *TLI, PromotedInsts);
6393// Check if we can promote. 6395// Save the current extension as we cannot move up through its operand. 6400// Save the current state. 6401 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6402 TPT.getRestorationPoint();
6404unsigned NewCreatedInstsCost = 0;
6407Value *PromotedVal = TPH(
I, TPT, PromotedInsts, NewCreatedInstsCost,
6408 &NewExts,
nullptr, *TLI);
6410"TypePromotionHelper should have filtered out those cases");
6412// We would be able to merge only one extension in a load. 6413// Therefore, if we have more than 1 new extension we heuristically 6414// cut this search path, because it means we degrade the code quality. 6415// With exactly 2, the transformation is neutral, because we will merge 6416// one extension but leave one. However, we optimistically keep going, 6417// because the new extension may be removed too. Also avoid replacing a 6418// single free extension with multiple extensions, as this increases the 6419// number of IR instructions while not providing any savings. 6420longlong TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6421// FIXME: It would be possible to propagate a negative value instead of 6422// conservatively ceiling it to 0. 6423 TotalCreatedInstsCost =
6424 std::max((
longlong)0, (TotalCreatedInstsCost - ExtCost));
6426 (TotalCreatedInstsCost > 1 ||
6428 (ExtCost == 0 && NewExts.
size() > 1))) {
6429// This promotion is not profitable, rollback to the previous state, and 6430// save the current extension in ProfitablyMovedExts as the latest 6431// speculative promotion turned out to be unprofitable. 6432 TPT.rollback(LastKnownGood);
6436// Continue promoting NewExts as far as doing so is profitable. 6438 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6439bool NewPromoted =
false;
6440for (
auto *ExtInst : NewlyMovedExts) {
6441Instruction *MovedExt = cast<Instruction>(ExtInst);
6443// If we have reached to a load, we need this extra profitability check 6444// as it could potentially be merged into an ext(load). 6445if (isa<LoadInst>(ExtOperand) &&
6450 ProfitablyMovedExts.
push_back(MovedExt);
6454// If none of speculative promotions for NewExts is profitable, rollback 6455// and save the current extension (I) as the last profitable extension. 6457 TPT.rollback(LastKnownGood);
6461// The promotion is profitable. 6467/// Merging redundant sexts when one is dominating the other. 6468bool CodeGenPrepare::mergeSExts(
Function &
F) {
6470for (
auto &Entry : ValToSExtendedUses) {
6471 SExts &Insts =
Entry.second;
6474if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6477bool inserted =
false;
6478for (
auto &Pt : CurPts) {
6481 RemovedInsts.insert(Pt);
6482 Pt->removeFromParent();
6489// Give up if we need to merge in a common dominator as the 6490// experiments show it is not profitable. 6493 RemovedInsts.insert(Inst);
6500 CurPts.push_back(Inst);
6506// Splitting large data structures so that the GEPs accessing them can have 6507// smaller offsets so that they can be sunk to the same blocks as their users. 6508// For example, a large struct starting from %base is split into two parts 6509// where the second part starts from %new_base. 6516// %gep0 = gep %base, off0 6517// %gep1 = gep %base, off1 6518// %gep2 = gep %base, off2 6521// %load1 = load %gep0 6522// %load2 = load %gep1 6523// %load3 = load %gep2 6528// %new_base = gep %base, off0 6531// %new_gep0 = %new_base 6532// %new_gep1 = gep %new_base, off1 - off0 6533// %new_gep2 = gep %new_base, off2 - off0 6536// %load1 = load i32, i32* %new_gep0 6537// %load2 = load i32, i32* %new_gep1 6538// %load3 = load i32, i32* %new_gep2 6540// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because 6541// their offsets are smaller enough to fit into the addressing mode. 6542bool CodeGenPrepare::splitLargeGEPOffsets() {
6544for (
auto &Entry : LargeOffsetGEPMap) {
6547 &LargeOffsetGEPs =
Entry.second;
6548auto compareGEPOffset =
6549 [&](
const std::pair<GetElementPtrInst *, int64_t> &
LHS,
6550const std::pair<GetElementPtrInst *, int64_t> &
RHS) {
6551if (
LHS.first ==
RHS.first)
6553if (
LHS.second !=
RHS.second)
6554returnLHS.second <
RHS.second;
6555return LargeOffsetGEPID[
LHS.first] < LargeOffsetGEPID[
RHS.first];
6557// Sorting all the GEPs of the same data structures based on the offsets. 6558llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6560// Skip if all the GEPs have the same offsets. 6561if (LargeOffsetGEPs.
front().second == LargeOffsetGEPs.
back().second)
6564 int64_t BaseOffset = LargeOffsetGEPs.
begin()->second;
6565Value *NewBaseGEP =
nullptr;
6567auto createNewBase = [&](int64_t BaseOffset,
Value *OldBase,
6570Type *PtrIdxTy =
DL->getIndexType(
GEP->getType());
6572 PointerType::get(Ctx,
GEP->getType()->getPointerAddressSpace());
6576if (
auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6577// If the base of the struct is an instruction, the new base will be 6578// inserted close to it. 6580if (isa<PHINode>(BaseI))
6582elseif (
InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6584SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6587 NewBaseInsertPt = std::next(BaseI->getIterator());
6589// If the current base is an argument or global value, the new base 6590// will be inserted to the entry block. 6594IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6595// Create a new base. 6596Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6597 NewBaseGEP = OldBase;
6598if (NewBaseGEP->
getType() != I8PtrTy)
6599 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6601 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex,
"splitgep");
6602 NewGEPBases.
insert(NewBaseGEP);
6606// Check whether all the offsets can be encoded with prefered common base. 6608 LargeOffsetGEPs.
front().second, LargeOffsetGEPs.
back().second)) {
6609 BaseOffset = PreferBase;
6610// Create a new base if the offset of the BaseGEP can be decoded with one 6612 createNewBase(BaseOffset, OldBase, BaseGEP);
6615auto *LargeOffsetGEP = LargeOffsetGEPs.
begin();
6616while (LargeOffsetGEP != LargeOffsetGEPs.
end()) {
6618 int64_t
Offset = LargeOffsetGEP->second;
6619if (
Offset != BaseOffset) {
6623// The result type of the GEP might not be the type of the memory 6626GEP->getResultElementType(),
6627GEP->getAddressSpace())) {
6628// We need to create a new base if the offset to the current base is 6629// too large to fit into the addressing mode. So, a very large struct 6630// may be split into several parts. 6633 NewBaseGEP =
nullptr;
6637// Generate a new GEP to replace the current one. 6638Type *PtrIdxTy =
DL->getIndexType(
GEP->getType());
6641// Create a new base if we don't have one yet. Find the insertion 6642// pointer for the new base first. 6643 createNewBase(BaseOffset, OldBase,
GEP);
6647Value *NewGEP = NewBaseGEP;
6648if (
Offset != BaseOffset) {
6649// Calculate the new offset for the new GEP. 6651 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6655 LargeOffsetGEP = LargeOffsetGEPs.
erase(LargeOffsetGEP);
6656GEP->eraseFromParent();
6663bool CodeGenPrepare::optimizePhiType(
6666// We are looking for a collection on interconnected phi nodes that together 6667// only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts 6668// are of the same type. Convert the whole set of nodes to the type of the 6670Type *PhiTy =
I->getType();
6671Type *ConvertTy =
nullptr;
6673 (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatingPointTy()))
6684// This works by adding extra bitcasts between load/stores and removing 6685// existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi)) 6686// we can get in the situation where we remove a bitcast in one iteration 6687// just to add it again in the next. We need to ensure that at least one 6688// bitcast we remove are anchored to something that will not change back. 6689bool AnyAnchored =
false;
6691while (!Worklist.
empty()) {
6694if (
auto *Phi = dyn_cast<PHINode>(
II)) {
6695// Handle Defs, which might also be PHI's 6696for (
Value *V :
Phi->incoming_values()) {
6697if (
auto *OpPhi = dyn_cast<PHINode>(V)) {
6698if (!PhiNodes.
count(OpPhi)) {
6699if (!Visited.
insert(OpPhi).second)
6704 }
elseif (
auto *OpLoad = dyn_cast<LoadInst>(V)) {
6705if (!OpLoad->isSimple())
6707if (Defs.
insert(OpLoad).second)
6709 }
elseif (
auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6710if (Defs.
insert(OpEx).second)
6712 }
elseif (
auto *OpBC = dyn_cast<BitCastInst>(V)) {
6714 ConvertTy = OpBC->getOperand(0)->getType();
6715if (OpBC->getOperand(0)->getType() != ConvertTy)
6717if (Defs.
insert(OpBC).second) {
6719 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6720 !isa<ExtractElementInst>(OpBC->getOperand(0));
6722 }
elseif (
auto *OpC = dyn_cast<ConstantData>(V))
6729// Handle uses which might also be phi's 6730for (
User *V :
II->users()) {
6731if (
auto *OpPhi = dyn_cast<PHINode>(V)) {
6732if (!PhiNodes.
count(OpPhi)) {
6733if (Visited.
count(OpPhi))
6739 }
elseif (
auto *OpStore = dyn_cast<StoreInst>(V)) {
6740if (!OpStore->isSimple() || OpStore->getOperand(0) !=
II)
6742Uses.insert(OpStore);
6743 }
elseif (
auto *OpBC = dyn_cast<BitCastInst>(V)) {
6745 ConvertTy = OpBC->getType();
6746if (OpBC->getType() != ConvertTy)
6750any_of(OpBC->users(), [](
User *U) { return !isa<StoreInst>(U); });
6757if (!ConvertTy || !AnyAnchored ||
6762 << *ConvertTy <<
"\n");
6764// Create all the new phi nodes of the new type, and bitcast any loads to the 6770if (isa<BitCastInst>(
D)) {
6771 ValMap[
D] =
D->getOperand(0);
6775 ValMap[
D] =
newBitCastInst(
D, ConvertTy,
D->getName() +
".bc", insertPt);
6780Phi->getName() +
".tc",
Phi->getIterator());
6781// Pipe together all the PhiNodes. 6782for (
PHINode *Phi : PhiNodes) {
6783PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6784for (
int i = 0, e =
Phi->getNumIncomingValues(); i < e; i++)
6786Phi->getIncomingBlock(i));
6789// And finally pipe up the stores and bitcasts 6791if (isa<BitCastInst>(U)) {
6795U->setOperand(0,
newBitCastInst(ValMap[
U->getOperand(0)], PhiTy,
"bc",
6800// Save the removed phis to be deleted later. 6802 DeletedInstrs.
insert(Phi);
6806bool CodeGenPrepare::optimizePhiTypes(
Function &
F) {
6814// Attempt to optimize all the phis in the functions to the correct type. 6816for (
auto &Phi : BB.
phis())
6817 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6819// Remove any old phi's that have been converted. 6820for (
auto *
I : DeletedInstrs) {
6822I->eraseFromParent();
6828/// Return true, if an ext(load) can be formed from an extension in 6830bool CodeGenPrepare::canFormExtLd(
6833for (
auto *MovedExtInst : MovedExts) {
6834if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6835 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6836 Inst = MovedExtInst;
6843// If they're already in the same block, there's nothing to do. 6844// Make the cheap checks first if we did not promote. 6845// If we promoted, we need to check if it is indeed profitable. 6852/// Move a zext or sext fed by a load into the same basic block as the load, 6853/// unless conditions are unfavorable. This allows SelectionDAG to fold the 6854/// extend into the load. 6858/// %ld = load i32* %addr 6859/// %add = add nuw i32 %ld, 4 6860/// %zext = zext i32 %add to i64 6864/// %ld = load i32* %addr 6865/// %zext = zext i32 %ld to i64 6866/// %add = add nuw i64 %zext, 4 6868/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which 6869/// allow us to match zext(load i32*) to i64. 6871/// Also, try to promote the computations used to obtain a sign extended 6872/// value used into memory accesses. 6875/// a = add nsw i32 b, 3 6876/// d = sext i32 a to i64 6877/// e = getelementptr ..., i64 d 6881/// f = sext i32 b to i64 6882/// a = add nsw i64 f, 3 6883/// e = getelementptr ..., i64 a 6886/// \p Inst[in/out] the extension may be modified during the process if some 6887/// promotions apply. 6888bool CodeGenPrepare::optimizeExt(
Instruction *&Inst) {
6889bool AllowPromotionWithoutCommonHeader =
false;
6890 /// See if it is an interesting sext operations for the address type 6891 /// promotion before trying to promote it, e.g., the ones with the right 6892 /// type and used in memory accesses. 6894 *Inst, AllowPromotionWithoutCommonHeader);
6895 TypePromotionTransaction TPT(RemovedInsts);
6896 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6897 TPT.getRestorationPoint();
6902bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6904// Look for a load being extended. 6908// Try to promote a chain of computation if it allows to form an extended 6910if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6911assert(LI && ExtFedByLoad &&
"Expect a valid load and extension");
6913// Move the extend into the same block as the load. 6916 Inst = ExtFedByLoad;
6920// Continue promoting SExts if known as considerable depending on targets. 6921if (ATPConsiderable &&
6922 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6923 HasPromoted, TPT, SpeculativelyMovedExts))
6926 TPT.rollback(LastKnownGood);
6930// Perform address type promotion if doing so is profitable. 6931// If AllowPromotionWithoutCommonHeader == false, we should find other sext 6932// instructions that sign extended the same initial value. However, if 6933// AllowPromotionWithoutCommonHeader == true, we expect promoting the 6934// extension is just profitable. 6935bool CodeGenPrepare::performAddressTypePromotion(
6936Instruction *&Inst,
bool AllowPromotionWithoutCommonHeader,
6937bool HasPromoted, TypePromotionTransaction &TPT,
6939bool Promoted =
false;
6941bool AllSeenFirst =
true;
6942for (
auto *
I : SpeculativelyMovedExts) {
6943Value *HeadOfChain =
I->getOperand(0);
6945 SeenChainsForSExt.
find(HeadOfChain);
6946// If there is an unhandled SExt which has the same header, try to promote 6948if (AlreadySeen != SeenChainsForSExt.
end()) {
6949if (AlreadySeen->second !=
nullptr)
6950 UnhandledExts.
insert(AlreadySeen->second);
6951 AllSeenFirst =
false;
6955if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6956 SpeculativelyMovedExts.size() == 1)) {
6960for (
auto *
I : SpeculativelyMovedExts) {
6961Value *HeadOfChain =
I->getOperand(0);
6962 SeenChainsForSExt[HeadOfChain] =
nullptr;
6963 ValToSExtendedUses[HeadOfChain].push_back(
I);
6965// Update Inst as promotion happen. 6966 Inst = SpeculativelyMovedExts.pop_back_val();
6968// This is the first chain visited from the header, keep the current chain 6969// as unhandled. Defer to promote this until we encounter another SExt 6970// chain derived from the same header. 6971for (
auto *
I : SpeculativelyMovedExts) {
6972Value *HeadOfChain =
I->getOperand(0);
6973 SeenChainsForSExt[HeadOfChain] = Inst;
6978if (!AllSeenFirst && !UnhandledExts.
empty())
6979for (
auto *VisitedSExt : UnhandledExts) {
6980if (RemovedInsts.count(VisitedSExt))
6982 TypePromotionTransaction TPT(RemovedInsts);
6986bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6990for (
auto *
I : Chains) {
6991Value *HeadOfChain =
I->getOperand(0);
6992// Mark this as handled. 6993 SeenChainsForSExt[HeadOfChain] =
nullptr;
6994 ValToSExtendedUses[HeadOfChain].push_back(
I);
7003// If the result of a {s|z}ext and its source are both live out, rewrite all 7004// other uses of the source with result of extension. 7005Value *Src =
I->getOperand(0);
7006if (Src->hasOneUse())
7009// Only do this xform if truncating is free. 7013// Only safe to perform the optimization if the source is also defined in 7015if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->
getParent())
7018bool DefIsLiveOut =
false;
7019for (
User *U :
I->users()) {
7022// Figure out which BB this ext is used in. 7032// Make sure none of the uses are PHI nodes. 7033for (
User *U : Src->users()) {
7038// Be conservative. We don't want this xform to end up introducing 7039// reloads just before load / store instructions. 7040if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7044// InsertedTruncs - Only insert one trunc in each block once. 7047bool MadeChange =
false;
7048for (
Use &U : Src->uses()) {
7051// Figure out which BB this ext is used in. 7056// Both src and def are live in this block. Rewrite the use. 7057Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7059if (!InsertedTrunc) {
7062 InsertedTrunc =
newTruncInst(
I, Src->getType(),
"");
7064 InsertedInsts.insert(InsertedTrunc);
7067// Replace a use of the {s|z}ext source with a use of the result. 7076// Find loads whose uses only use some of the loaded value's bits. Add an "and" 7077// just after the load if the target can fold this into one extload instruction, 7078// with the hope of eliminating some of the other later "and" instructions using 7079// the loaded value. "and"s that are made trivially redundant by the insertion 7080// of the new "and" are removed by this function, while others (e.g. those whose 7081// path from the load goes through a phi) are left for isel to potentially 7114// becomes (after a call to optimizeLoadExt for each load): 7118// x1' = and x1, 0xff 7122// x2' = and x2, 0xff 7127bool CodeGenPrepare::optimizeLoadExt(
LoadInst *Load) {
7128if (!
Load->isSimple() || !
Load->getType()->isIntOrPtrTy())
7131// Skip loads we've already transformed. 7132if (
Load->hasOneUse() &&
7133 InsertedInsts.count(cast<Instruction>(*
Load->user_begin())))
7136// Look at all uses of Load, looking through phis, to determine how many bits 7137// of the loaded value are needed. 7142for (
auto *U :
Load->users())
7143 WorkList.
push_back(cast<Instruction>(U));
7147// If the BitWidth is 0, do not try to optimize the type 7154while (!WorkList.
empty()) {
7157// Break use-def graph loops. 7161// For a PHI node, push all of its users. 7162if (
auto *Phi = dyn_cast<PHINode>(
I)) {
7163for (
auto *U :
Phi->users())
7164 WorkList.
push_back(cast<Instruction>(U));
7168switch (
I->getOpcode()) {
7169case Instruction::And: {
7170auto *AndC = dyn_cast<ConstantInt>(
I->getOperand(1));
7173APInt AndBits = AndC->getValue();
7174 DemandBits |= AndBits;
7175// Keep track of the widest and mask we see. 7176if (AndBits.
ugt(WidestAndBits))
7177 WidestAndBits = AndBits;
7178if (AndBits == WidestAndBits &&
I->getOperand(0) == Load)
7183case Instruction::Shl: {
7184auto *ShlC = dyn_cast<ConstantInt>(
I->getOperand(1));
7188 DemandBits.setLowBits(
BitWidth - ShiftAmt);
7193case Instruction::Trunc: {
7196 DemandBits.setLowBits(TruncBitWidth);
7206uint32_t ActiveBits = DemandBits.getActiveBits();
7207// Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the 7208// target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, 7209// for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but 7210// (and (load x) 1) is not matched as a single instruction, rather as a LDR 7211// followed by an AND. 7212// TODO: Look into removing this restriction by fixing backends to either 7213// return false for isLoadExtLegal for i1 or have them select this pattern to 7214// a single instruction. 7216// Also avoid hoisting if we didn't see any ands with the exact DemandBits 7217// mask, since these are the only ands that will be removed by isel. 7218if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7219 WidestAndBits != DemandBits)
7226// Reject cases that won't be matched as extloads. 7232auto *NewAnd = cast<Instruction>(
7233 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7234// Mark this instruction as "inserted by CGP", so that other 7235// optimizations don't touch it. 7236 InsertedInsts.insert(NewAnd);
7238// Replace all uses of load with new and (except for the use of load in the 7241 NewAnd->setOperand(0, Load);
7243// Remove any and instructions that are now redundant. 7244for (
auto *
And : AndsToMaybeRemove)
7245// Check that the and mask is the same as the one we decided to put on the 7247if (cast<ConstantInt>(
And->getOperand(1))->getValue() == DemandBits) {
7249if (&*CurInstIterator ==
And)
7250 CurInstIterator = std::next(
And->getIterator());
7251And->eraseFromParent();
7255// NSW flags may not longer hold. 7256for (
auto *Inst : DropFlags)
7263/// Check if V (an operand of a select instruction) is an expensive instruction 7264/// that is only used once. 7266auto *
I = dyn_cast<Instruction>(V);
7267// If it's safe to speculatively execute, then it should not have side 7268// effects; therefore, it's safe to sink and possibly *not* execute. 7273/// Returns true if a SelectInst should be turned into an explicit branch. 7277// If even a predictable select is cheap, then a branch can't be cheaper. 7281// FIXME: This should use the same heuristics as IfConversion to determine 7282// whether a select is better represented as a branch. 7284// If metadata tells us that the select condition is obviously predictable, 7285// then we want to replace the select with a branch. 7288uint64_t Max = std::max(TrueWeight, FalseWeight);
7289uint64_t Sum = TrueWeight + FalseWeight;
7297CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7299// If a branch is predictable, an out-of-order CPU can avoid blocking on its 7300// comparison condition. If the compare has more than one use, there's 7301// probably another cmov or setcc around, so it's not worth emitting a branch. 7302if (!Cmp || !Cmp->hasOneUse())
7305// If either operand of the select is expensive and only needed on one side 7306// of the select, we should form a branch. 7314/// If \p isTrue is true, return the true value of \p SI, otherwise return 7315/// false value of \p SI. If the true/false value of \p SI is defined by any 7316/// select instructions in \p Selects, look through the defining select 7317/// instruction until the true/false value is not defined in \p Selects. 7324 DefSI = dyn_cast<SelectInst>(V)) {
7325assert(DefSI->getCondition() == SI->getCondition() &&
7326"The condition of DefSI does not match with SI");
7327 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7330assert(V &&
"Failed to get select true/false value");
7337// If this is (1) a vector shift, (2) shifts by scalars are cheaper than 7338// general vector shifts, and (3) the shift amount is a select-of-splatted 7339// values, hoist the shifts before the select: 7340// shift Op0, (select Cond, TVal, FVal) --> 7341// select Cond, (shift Op0, TVal), (shift Op0, FVal) 7343// This is inverting a generic IR transform when we know that the cost of a 7344// general vector shift is more than the cost of 2 shift-by-scalars. 7345// We can't do this effectively in SDAG because we may not be able to 7346// determine if the select operands are splats from within a basic block. 7359Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->
getOperand(0), TVal);
7360Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->
getOperand(0), FVal);
7361Value *NewSel = Builder.CreateSelect(
Cond, NewTVal, NewFVal);
7367bool CodeGenPrepare::optimizeFunnelShift(
IntrinsicInst *Fsh) {
7369assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7370"Expected a funnel shift");
7372// If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper 7373// than general vector shifts, and (3) the shift amount is select-of-splatted 7374// values, hoist the funnel shifts before the select: 7375// fsh Op0, Op1, (select Cond, TVal, FVal) --> 7376// select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal) 7378// This is inverting a generic IR transform when we know that the cost of a 7379// general vector shift is more than the cost of 2 shift-by-scalars. 7380// We can't do this effectively in SDAG because we may not be able to 7381// determine if the select operands are splats from within a basic block. 7394Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {
X,
Y, TVal});
7395Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {
X,
Y, FVal});
7396Value *NewSel = Builder.CreateSelect(
Cond, NewTVal, NewFVal);
7402/// If we have a SelectInst that will likely profit from branch prediction, 7403/// turn it into a branch. 7404bool CodeGenPrepare::optimizeSelectInst(
SelectInst *SI) {
7408// If the SelectOptimize pass is enabled, selects have already been optimized. 7412// Find all consecutive select instructions that share the same condition. 7416 It !=
SI->getParent()->
end(); ++It) {
7418if (
I &&
SI->getCondition() ==
I->getCondition()) {
7426// Increment the current iterator to skip all the rest of select instructions 7427// because they will be either "not lowered" or "all lowered" to branch. 7428 CurInstIterator = std::next(LastSI->
getIterator());
7429// Examine debug-info attached to the consecutive select instructions. They 7430// won't be individually optimised by optimizeInst, so we need to perform 7431// DbgVariableRecord maintenence here instead. 7433 fixupDbgVariableRecordsOnInst(*SI);
7435bool VectorCond = !
SI->getCondition()->getType()->isIntegerTy(1);
7437// Can we convert the 'select' to CF ? 7438if (VectorCond ||
SI->getMetadata(LLVMContext::MD_unpredictable))
7442if (
SI->getType()->isVectorTy())
7443 SelectKind = TargetLowering::ScalarCondVectorVal;
7445 SelectKind = TargetLowering::ScalarValSelect;
7452// The DominatorTree needs to be rebuilt by any consumers after this 7453// transformation. We simply reset here rather than setting the ModifiedDT 7454// flag to avoid restarting the function walk in runOnFunction for each 7458// Transform a sequence like this: 7460// %cmp = cmp uge i32 %a, %b 7461// %sel = select i1 %cmp, i32 %c, i32 %d 7465// %cmp = cmp uge i32 %a, %b 7466// %cmp.frozen = freeze %cmp 7467// br i1 %cmp.frozen, label %select.true, label %select.false 7469// br label %select.end 7471// br label %select.end 7473// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] 7475// %cmp should be frozen, otherwise it may introduce undefined behavior. 7476// In addition, we may sink instructions that produce %c or %d from 7477// the entry block into the destination(s) of the new branch. 7478// If the true or false blocks do not contain a sunken instruction, that 7479// block and its branch may be optimized away. In that case, one side of the 7480// first branch will point directly to select.end, and the corresponding PHI 7481// predecessor block will be the start block. 7483// Collect values that go on the true side and the values that go on the false 7488 TrueInstrs.
push_back(cast<Instruction>(V));
7490 FalseInstrs.
push_back(cast<Instruction>(V));
7493// Split the select block, according to how many (if any) values go on each 7497// We should split before any debug-info. 7498 SplitPt.setHeadBit(
true);
7501auto *CondFr =
IB.CreateFreeze(
SI->getCondition(),
SI->getName() +
".frozen");
7508if (TrueInstrs.
size() == 0) {
7510 CondFr, SplitPt,
false,
nullptr,
nullptr, LI));
7512 EndBlock = cast<BasicBlock>(FalseBranch->
getOperand(0));
7513 }
elseif (FalseInstrs.
size() == 0) {
7515 CondFr, SplitPt,
false,
nullptr,
nullptr, LI));
7517 EndBlock = cast<BasicBlock>(TrueBranch->
getOperand(0));
7522nullptr,
nullptr, LI);
7523 TrueBranch = cast<BranchInst>(ThenTerm);
7524 FalseBranch = cast<BranchInst>(ElseTerm);
7527 EndBlock = cast<BasicBlock>(TrueBranch->
getOperand(0));
7530 EndBlock->
setName(
"select.end");
7532 TrueBlock->
setName(
"select.true.sink");
7534 FalseBlock->
setName(FalseInstrs.
size() == 0 ?
"select.false" 7535 :
"select.false.sink");
7539 FreshBBs.
insert(TrueBlock);
7541 FreshBBs.
insert(FalseBlock);
7542 FreshBBs.
insert(EndBlock);
7545BFI->setBlockFreq(EndBlock,
BFI->getBlockFreq(StartBlock));
7547staticconstunsigned MD[] = {
7548 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7549 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7552// Sink expensive instructions into the conditional blocks to avoid executing 7553// them speculatively. 7559// If we did not create a new block for one of the 'true' or 'false' paths 7560// of the condition, it means that side of the branch goes to the end block 7561// directly and the path originates from the start block from the point of 7562// view of the new PHI. 7563if (TrueBlock ==
nullptr)
7564 TrueBlock = StartBlock;
7565elseif (FalseBlock ==
nullptr)
7566 FalseBlock = StartBlock;
7569 INS.
insert(ASI.begin(), ASI.end());
7570// Use reverse iterator because later select may use the value of the 7571// earlier select, and we need to propagate value through earlier select 7572// to get the PHI operand. 7574// The select itself is replaced with a PHI Node. 7583SI->eraseFromParent();
7585 ++NumSelectsExpanded;
7588// Instruct OptimizeBlock to skip to the next block. 7589 CurInstIterator = StartBlock->
end();
7593/// Some targets only accept certain types for splat inputs. For example a VDUP 7594/// in MVE takes a GPR (integer) register, and the instruction that incorporate 7595/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. 7597// Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only 7605auto *SVIVecType = cast<FixedVectorType>(SVI->
getType());
7608"Expected a type of the same size!");
7612// Create a bitcast (shuffle (insert (bitcast(..)))) 7614 Builder.SetInsertPoint(SVI);
7615Value *BC1 = Builder.CreateBitCast(
7616 cast<Instruction>(SVI->
getOperand(0))->getOperand(1), NewType);
7617Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7618Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7622 SVI, TLInfo,
nullptr,
7623 [&](
Value *V) { removeAllAssertingVHReferences(V); });
7625// Also hoist the bitcast up to its operand if it they are not in the same 7627if (
auto *BCI = dyn_cast<Instruction>(BC1))
7628if (
auto *
Op = dyn_cast<Instruction>(BCI->
getOperand(0)))
7629if (BCI->
getParent() !=
Op->getParent() && !isa<PHINode>(
Op) &&
7630 !
Op->isTerminator() && !
Op->isEHPad())
7636bool CodeGenPrepare::tryToSinkFreeOperands(
Instruction *
I) {
7637// If the operands of I can be folded into a target instruction together with 7638// I, duplicate and sink them. 7643// OpsToSink can contain multiple uses in a use chain (e.g. 7644// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating 7645// uses must come first, so we process the ops in reverse order so as to not 7646// create invalid IR. 7652unsignedlong InstNumber = 0;
7653for (
constauto &
I : *TargetBB)
7654 InstOrdering[&
I] = InstNumber++;
7657auto *UI = cast<Instruction>(
U->get());
7658if (isa<PHINode>(UI))
7661if (InstOrdering[UI] < InstOrdering[InsertPoint])
7670for (
Use *U : ToReplace) {
7671auto *UI = cast<Instruction>(
U->get());
7675// Now we clone an instruction, its operands' defs may sink to this BB 7676// now. So we put the operands defs' BBs into FreshBBs to do optimization. 7678if (
auto *OpDef = dyn_cast<Instruction>(
Op))
7679 FreshBBs.
insert(OpDef->getParent());
7682 NewInstructions[UI] = NI;
7687 InsertedInsts.insert(NI);
7689// Update the use for the new instruction, making sure that we update the 7690// sunk instruction uses, if it is part of a chain that has already been 7693if (
auto It = NewInstructions.
find(OldI); It != NewInstructions.
end())
7694 It->second->setOperand(
U->getOperandNo(), NI);
7700// Remove instructions that are dead after sinking. 7701for (
auto *
I : MaybeDead) {
7702if (!
I->hasNUsesOrMore(1)) {
7704I->eraseFromParent();
7711bool CodeGenPrepare::optimizeSwitchType(
SwitchInst *SI) {
7719if (RegWidth <= cast<IntegerType>(OldType)->
getBitWidth())
7722// If the register width is greater than the type width, expand the condition 7723// of the switch instruction and each case constant to the width of the 7724// register. By widening the type of the switch condition, subsequent 7725// comparisons (for case comparisons) will not need to be extended to the 7726// preferred register width, so we will potentially eliminate N-1 extends, 7727// where N is the number of cases in the switch. 7730// Extend the switch condition and case constants using the target preferred 7731// extend unless the switch condition is a function argument with an extend 7732// attribute. In that case, we can avoid an unnecessary mask/extension by 7733// matching the argument extension instead. 7735// Some targets prefer SExt over ZExt. 7737 ExtType = Instruction::SExt;
7739if (
auto *Arg = dyn_cast<Argument>(
Cond)) {
7740if (Arg->hasSExtAttr())
7741 ExtType = Instruction::SExt;
7742if (Arg->hasZExtAttr())
7743 ExtType = Instruction::ZExt;
7749SI->setCondition(ExtInst);
7750for (
auto Case :
SI->cases()) {
7751constAPInt &NarrowConst = Case.getCaseValue()->getValue();
7752APInt WideConst = (ExtType == Instruction::ZExt)
7753 ? NarrowConst.
zext(RegWidth)
7754 : NarrowConst.
sext(RegWidth);
7755 Case.setValue(ConstantInt::get(Context, WideConst));
7761bool CodeGenPrepare::optimizeSwitchPhiConstants(
SwitchInst *SI) {
7762// The SCCP optimization tends to produce code like this: 7763// switch(x) { case 42: phi(42, ...) } 7764// Materializing the constant for the phi-argument needs instructions; So we 7765// change the code to: 7766// switch(x) { case 42: phi(x, ...) } 7768Value *Condition =
SI->getCondition();
7769// Avoid endless loop in degenerate case. 7770if (isa<ConstantInt>(*Condition))
7780// Set to true if we previously checked that `CaseBB` is only reached by 7781// a single case from this switch. 7782bool CheckedForSinglePred =
false;
7785// If ZExt is free then we can also catch patterns like this: 7786// switch((i32)x) { case 42: phi((i64)42, ...); } 7787// and replace `(i64)42` with `zext i32 %x to i64`. 7792if (PHIType == ConditionType || TryZExt) {
7793// Set to true to skip this case because of multiple preds. 7794bool SkipCase =
false;
7795Value *Replacement =
nullptr;
7796for (
unsignedI = 0, E =
PHI.getNumIncomingValues();
I != E;
I++) {
7797Value *PHIValue =
PHI.getIncomingValue(
I);
7798if (PHIValue != CaseValue) {
7801ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7807if (
PHI.getIncomingBlock(
I) != SwitchBB)
7809// We cannot optimize if there are multiple case labels jumping to 7810// this block. This check may get expensive when there are many 7811// case labels so we test for it last. 7812if (!CheckedForSinglePred) {
7813 CheckedForSinglePred =
true;
7814if (
SI->findCaseDest(CaseBB) ==
nullptr) {
7820if (Replacement ==
nullptr) {
7821if (PHIValue == CaseValue) {
7822 Replacement = Condition;
7825 Replacement = Builder.CreateZExt(Condition, PHIType);
7828PHI.setIncomingValue(
I, Replacement);
7839bool CodeGenPrepare::optimizeSwitchInst(
SwitchInst *SI) {
7840bool Changed = optimizeSwitchType(SI);
7841 Changed |= optimizeSwitchPhiConstants(SI);
7847/// Helper class to promote a scalar operation to a vector one. 7848/// This class is used to move downward extractelement transition. 7850/// a = vector_op <2 x i32> 7851/// b = extractelement <2 x i32> a, i32 0 7856/// a = vector_op <2 x i32> 7857/// c = vector_op a (equivalent to scalar_op on the related lane) 7858/// * d = extractelement <2 x i32> c, i32 0 7860/// Assuming both extractelement and store can be combine, we get rid of the 7862classVectorPromoteHelper {
7863 /// DataLayout associated with the current module. 7866 /// Used to perform some checks on the legality of vector operations. 7869 /// Used to estimated the cost of the promoted chain. 7872 /// The transition being moved downwards. 7875 /// The sequence of instructions to be promoted. 7878 /// Cost of combining a store and an extract. 7879unsigned StoreExtractCombineCost;
7881 /// Instruction that will be combined with the transition. 7884 /// The instruction that represents the current end of the transition. 7885 /// Since we are faking the promotion until we reach the end of the chain 7886 /// of computation, we need a way to get the current end of the transition. 7888if (InstsToBePromoted.
empty())
7890return InstsToBePromoted.
back();
7893 /// Return the index of the original value in the transition. 7894 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, 7895 /// c, is at index 0. 7896unsigned getTransitionOriginalValueIdx()
const{
7897assert(isa<ExtractElementInst>(Transition) &&
7898"Other kind of transitions are not supported yet");
7902 /// Return the index of the index in the transition. 7903 /// E.g., for "extractelement <2 x i32> c, i32 0" the index 7905unsigned getTransitionIdx()
const{
7906assert(isa<ExtractElementInst>(Transition) &&
7907"Other kind of transitions are not supported yet");
7911 /// Get the type of the transition. 7912 /// This is the type of the original value. 7913 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the 7914 /// transition is <2 x i32>. 7915Type *getTransitionType()
const{
7919 /// Promote \p ToBePromoted by moving \p Def downward through. 7920 /// I.e., we have the following sequence: 7921 /// Def = Transition <ty1> a to <ty2> 7922 /// b = ToBePromoted <ty2> Def, ... 7924 /// b = ToBePromoted <ty1> a, ... 7925 /// Def = Transition <ty1> ToBePromoted to <ty2> 7928 /// Check whether or not it is profitable to promote all the 7929 /// instructions enqueued to be promoted. 7930bool isProfitableToPromote() {
7931Value *ValIdx = Transition->
getOperand(getTransitionOriginalValueIdx());
7932unsignedIndex = isa<ConstantInt>(ValIdx)
7933 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7935Type *PromotedType = getTransitionType();
7938unsigned AS =
ST->getPointerAddressSpace();
7939// Check if this store is supported. 7943// If this is not supported, there is no way we can combine 7944// the extract with the store. 7948// The scalar chain of computation has to pay for the transition 7950// The vector chain has to account for the combining cost. 7956for (
constauto &Inst : InstsToBePromoted) {
7958// By construction, all instructions being promoted are arithmetic ones. 7959// Moreover, one argument is a constant that can be viewed as a splat 7962bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7963 isa<ConstantFP>(Arg0);
7976dbgs() <<
"Estimated cost of computation to be promoted:\nScalar: " 7977 << ScalarCost <<
"\nVector: " << VectorCost <<
'\n');
7978return ScalarCost > VectorCost;
7981 /// Generate a constant vector with \p Val with the same 7982 /// number of elements as the transition. 7983 /// \p UseSplat defines whether or not \p Val should be replicated 7984 /// across the whole vector. 7985 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, 7986 /// otherwise we generate a vector with as many poison as possible: 7987 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only 7988 /// used at the index of the extract. 7990unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7992// If we cannot determine where the constant must be, we have to 7993// use a splat constant. 7995if (
ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8001ElementCountEC = cast<VectorType>(getTransitionType())->getElementCount();
8005if (!
EC.isScalable()) {
8008for (
unsignedIdx = 0;
Idx !=
EC.getKnownMinValue(); ++
Idx) {
8009if (
Idx == ExtractIdx)
8017"Generate scalable vector for non-splat is unimplemented");
8020 /// Check if promoting to a vector type an operand at \p OperandIdx 8021 /// in \p Use can trigger undefined behavior. 8023unsigned OperandIdx) {
8024// This is not safe to introduce undef when the operand is on 8025// the right hand side of a division-like instruction. 8028switch (
Use->getOpcode()) {
8031case Instruction::SDiv:
8032case Instruction::UDiv:
8033case Instruction::SRem:
8034case Instruction::URem:
8036case Instruction::FDiv:
8037case Instruction::FRem:
8038return !
Use->hasNoNaNs();
8046unsigned CombineCost)
8047 :
DL(
DL), TLI(TLI),
TTI(
TTI), Transition(Transition),
8048 StoreExtractCombineCost(CombineCost) {
8049assert(Transition &&
"Do not know how to promote null");
8052 /// Check if we can promote \p ToBePromoted to \p Type. 8053bool canPromote(
constInstruction *ToBePromoted)
const{
8054// We could support CastInst too. 8055return isa<BinaryOperator>(ToBePromoted);
8058 /// Check if it is profitable to promote \p ToBePromoted 8059 /// by moving downward the transition through. 8060bool shouldPromote(
constInstruction *ToBePromoted)
const{
8061// Promote only if all the operands can be statically expanded. 8062// Indeed, we do not want to introduce any new kind of transitions. 8065if (Val == getEndOfTransition()) {
8066// If the use is a division and the transition is on the rhs, 8067// we cannot promote the operation, otherwise we may create a 8069if (canCauseUndefinedBehavior(ToBePromoted,
U.getOperandNo()))
8073if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8074 !isa<ConstantFP>(Val))
8077// Check that the resulting operation is legal. 8083 ISDOpcode, TLI.
getValueType(DL, getTransitionType(),
true));
8086 /// Check whether or not \p Use can be combined 8087 /// with the transition. 8088 /// I.e., is it possible to do Use(Transition) => AnotherUse? 8091 /// Record \p ToBePromoted as part of the chain to be promoted. 8092void enqueueForPromotion(
Instruction *ToBePromoted) {
8093 InstsToBePromoted.push_back(ToBePromoted);
8096 /// Set the instruction that will be combined with the transition. 8097void recordCombineInstruction(
Instruction *ToBeCombined) {
8099 CombineInst = ToBeCombined;
8102 /// Promote all the instructions enqueued for promotion if it is 8104 /// \return True if the promotion happened, false otherwise. 8106// Check if there is something to promote. 8107// Right now, if we do not have anything to combine with, 8108// we assume the promotion is not profitable. 8109if (InstsToBePromoted.empty() || !CombineInst)
8117for (
auto &ToBePromoted : InstsToBePromoted)
8118 promoteImpl(ToBePromoted);
8119 InstsToBePromoted.clear();
8124}
// end anonymous namespace 8126void VectorPromoteHelper::promoteImpl(
Instruction *ToBePromoted) {
8127// At this point, we know that all the operands of ToBePromoted but Def 8128// can be statically promoted. 8129// For Def, we need to use its parameter in ToBePromoted: 8130// b = ToBePromoted ty1 a 8131// Def = Transition ty1 b to ty2 8132// Move the transition down. 8133// 1. Replace all uses of the promoted operation by the transition. 8134// = ... b => = ... Def. 8136"The type of the result of the transition does not match " 8139// 2. Update the type of the uses. 8140// b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. 8141Type *TransitionTy = getTransitionType();
8143// 3. Update all the operands of the promoted operation with promoted 8145// b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. 8148Value *NewVal =
nullptr;
8149if (Val == Transition)
8150 NewVal = Transition->
getOperand(getTransitionOriginalValueIdx());
8151elseif (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8152 isa<ConstantFP>(Val)) {
8153// Use a splat constant if it is not safe to use undef. 8155 cast<Constant>(Val),
8156 isa<UndefValue>(Val) ||
8157 canCauseUndefinedBehavior(ToBePromoted,
U.getOperandNo()));
8161 ToBePromoted->
setOperand(
U.getOperandNo(), NewVal);
8164 Transition->
setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8167/// Some targets can do store(extractelement) with one instruction. 8168/// Try to push the extractelement towards the stores when the target 8169/// has this feature and this is profitable. 8170bool CodeGenPrepare::optimizeExtractElementInst(
Instruction *Inst) {
8171unsigned CombineCost = std::numeric_limits<unsigned>::max();
8178// At this point we know that Inst is a vector to scalar transition. 8179// Try to move it down the def-use chain, until: 8180// - We can combine the transition with its single use 8181// => we got rid of the transition. 8182// - We escape the current basic block 8183// => we would need to check that we are moving it at a cheaper place and 8184// we do not do that for now. 8186LLVM_DEBUG(
dbgs() <<
"Found an interesting transition: " << *Inst <<
'\n');
8187 VectorPromoteHelper VPH(*
DL, *TLI, *
TTI, Inst, CombineCost);
8188// If the transition has more than one use, assume this is not going to be 8194if (ToBePromoted->
getParent() != Parent) {
8195LLVM_DEBUG(
dbgs() <<
"Instruction to promote is in a different block (" 8197 <<
") than the transition (" << Parent->
getName()
8202if (VPH.canCombine(ToBePromoted)) {
8204 <<
"will be combined with: " << *ToBePromoted <<
'\n');
8205 VPH.recordCombineInstruction(ToBePromoted);
8206bool Changed = VPH.promote();
8207 NumStoreExtractExposed += Changed;
8212if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8215LLVM_DEBUG(
dbgs() <<
"Promoting is possible... Enqueue for promotion!\n");
8217 VPH.enqueueForPromotion(ToBePromoted);
8218 Inst = ToBePromoted;
8223/// For the instruction sequence of store below, F and I values 8224/// are bundled together as an i64 value before being stored into memory. 8225/// Sometimes it is more efficient to generate separate stores for F and I, 8226/// which can remove the bitwise instructions or sink them to colder places. 8228/// (store (or (zext (bitcast F to i32) to i64), 8229/// (shl (zext I to i64), 32)), addr) --> 8230/// (store F, addr) and (store I, addr+4) 8232/// Similarly, splitting for other merged store can also be beneficial, like: 8233/// For pair of {i32, i32}, i64 store --> two i32 stores. 8234/// For pair of {i32, i16}, i64 store --> two i32 stores. 8235/// For pair of {i16, i16}, i32 store --> two i16 stores. 8236/// For pair of {i16, i8}, i32 store --> two i16 stores. 8237/// For pair of {i8, i8}, i16 store --> two i8 stores. 8239/// We allow each target to determine specifically which kind of splitting is 8242/// The store patterns are commonly seen from the simple code snippet below 8243/// if only std::make_pair(...) is sroa transformed before inlined into hoo. 8244/// void goo(const std::pair<int, float> &); 8247/// goo(std::make_pair(tmp, ftmp)); 8251/// Although we already have similar splitting in DAG Combine, we duplicate 8252/// it in CodeGenPrepare to catch the case in which pattern is across 8253/// multiple BBs. The logic in DAG Combine is kept to catch case generated 8254/// during code expansion. 8257// Handle simple but common cases only. 8258Type *StoreType = SI.getValueOperand()->getType();
8260// The code below assumes shifting a value by <number of bits>, 8261// whereas scalable vectors would have to be shifted by 8262// <2log(vscale) + number of bits> in order to store the 8263// low/high parts. Bailing out for now. 8267if (!
DL.typeSizeEqualsStoreSize(StoreType) ||
8268DL.getTypeSizeInBits(StoreType) == 0)
8271unsigned HalfValBitSize =
DL.getTypeSizeInBits(StoreType) / 2;
8273if (!
DL.typeSizeEqualsStoreSize(SplitStoreType))
8276// Don't split the store if it is volatile. 8280// Match the following patterns: 8281// (store (or (zext LValue to i64), 8282// (shl (zext HValue to i64), 32)), HalfValBitSize) 8284// (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) 8285// (zext LValue to i64), 8286// Expect both operands of OR and the first operand of SHL have only 8289if (!
match(SI.getValueOperand(),
8295// Check LValue and HValue are int with size less or equal than 32. 8296if (!
LValue->getType()->isIntegerTy() ||
8297DL.getTypeSizeInBits(
LValue->getType()) > HalfValBitSize ||
8299DL.getTypeSizeInBits(HValue->
getType()) > HalfValBitSize)
8302// If LValue/HValue is a bitcast instruction, use the EVT before bitcast 8303// as the input of target query. 8304auto *LBC = dyn_cast<BitCastInst>(
LValue);
8305auto *HBC = dyn_cast<BitCastInst>(HValue);
8313// Start to split store. 8317// If LValue/HValue is a bitcast in another BB, create a new one in current 8318// BB so it may be merged with the splitted stores by dag combiner. 8319if (LBC && LBC->getParent() != SI.getParent())
8321if (HBC && HBC->getParent() != SI.getParent())
8322 HValue = Builder.
CreateBitCast(HBC->getOperand(0), HBC->getType());
8324bool IsLE = SI.getDataLayout().isLittleEndian();
8325auto CreateSplitStore = [&](
Value *V,
boolUpper) {
8328Align Alignment = SI.getAlign();
8329constbool IsOffsetStore = (IsLE &&
Upper) || (!IsLE && !
Upper);
8332 SplitStoreType,
Addr,
8335// When splitting the store in half, naturally one half will retain the 8336// alignment of the original wider store, regardless of whether it was 8337// over-aligned or not, while the other will require adjustment. 8343 CreateSplitStore(
LValue,
false);
8344 CreateSplitStore(HValue,
true);
8346// Delete the old store. 8347 SI.eraseFromParent();
8351// Return true if the GEP has two operands, the first operand is of a sequential 8352// type, and the second operand is a constant. 8355returnGEP->getNumOperands() == 2 &&
I.isSequential() &&
8356 isa<ConstantInt>(
GEP->getOperand(1));
8359// Try unmerging GEPs to reduce liveness interference (register pressure) across 8360// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, 8361// reducing liveness interference across those edges benefits global register 8362// allocation. Currently handles only certain cases. 8364// For example, unmerge %GEPI and %UGEPI as below. 8366// ---------- BEFORE ---------- 8371// %GEPI = gep %GEPIOp, Idx 8373// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ] 8374// (* %GEPI is alive on the indirectbr edges due to other uses ahead) 8375// (* %GEPIOp is alive on the indirectbr edges only because of it's used by 8378// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged) 8379// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged) 8384// %UGEPI = gep %GEPIOp, UIdx 8386// --------------------------- 8388// ---------- AFTER ---------- 8390// ... (same as above) 8391// (* %GEPI is still alive on the indirectbr edges) 8392// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the 8398// %UGEPI = gep %GEPI, (UIdx-Idx) 8400// --------------------------- 8402// The register pressure on the IndirectBr edges is reduced because %GEPIOp is 8403// no longer alive on them. 8405// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging 8406// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as 8407// not to disable further simplications and optimizations as a result of GEP 8410// Note this unmerging may increase the length of the data flow critical path 8411// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff 8412// between the register pressure and the length of data-flow critical 8413// path. Restricting this to the uncommon IndirectBr case would minimize the 8414// impact of potentially longer critical path, if any, and the impact on compile 8419// Check that SrcBlock ends with an IndirectBr. If not, give up. The common 8420// (non-IndirectBr) cases exit early here. 8423// Check that GEPI is a simple gep with a single constant index. 8427// Check that GEPI is a cheap one. 8433// Check that GEPIOp is an instruction that's also defined in SrcBlock. 8434if (!isa<Instruction>(GEPIOp))
8436auto *GEPIOpI = cast<Instruction>(GEPIOp);
8437if (GEPIOpI->getParent() != SrcBlock)
8439// Check that GEP is used outside the block, meaning it's alive on the 8440// IndirectBr edge(s). 8442 if (auto *I = dyn_cast<Instruction>(Usr)) {
8443 if (I->getParent() != SrcBlock) {
8450// The second elements of the GEP chains to be unmerged. 8451 std::vector<GetElementPtrInst *> UGEPIs;
8452// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive 8453// on IndirectBr edges. 8457// Check if Usr is an Instruction. If not, give up. 8458if (!isa<Instruction>(Usr))
8460auto *UI = cast<Instruction>(Usr);
8461// Check if Usr in the same block as GEPIOp, which is fine, skip. 8464// Check if Usr is a GEP. If not, give up. 8465if (!isa<GetElementPtrInst>(Usr))
8467auto *UGEPI = cast<GetElementPtrInst>(Usr);
8468// Check if UGEPI is a simple gep with a single constant index and GEPIOp is 8469// the pointer operand to it. If so, record it in the vector. If not, give 8473if (UGEPI->getOperand(0) != GEPIOp)
8475if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8477if (GEPIIdx->getType() !=
8478 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8480ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8485 UGEPIs.push_back(UGEPI);
8487if (UGEPIs.size() == 0)
8489// Check the materializing cost of (Uidx-Idx). 8491ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8498// Now unmerge between GEPI and UGEPIs. 8500 UGEPI->setOperand(0, GEPI);
8501ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8502Constant *NewUGEPIIdx = ConstantInt::get(
8503 GEPIIdx->getType(), UGEPIIdx->
getValue() - GEPIIdx->getValue());
8504 UGEPI->setOperand(1, NewUGEPIIdx);
8505// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not 8506// inbounds to avoid UB. 8507if (!GEPI->isInBounds()) {
8508 UGEPI->setIsInBounds(
false);
8511// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not 8512// alive on IndirectBr edges). 8515 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8517"GEPIOp is used outside SrcBlock");
8525// %c = icmp ult %x, 8 8530// %c = icmp eq %tc, 0 8532// Creating the cmp to zero can be better for the backend, especially if the 8533// lshr produces flags that can be used automatically. 8537ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8538if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8541Value *
X = Cmp->getOperand(0);
8542APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8544for (
auto *U :
X->users()) {
8546// A quick dominance check 8548 (UI->
getParent() != Branch->getParent() &&
8549 UI->
getParent() != Branch->getSuccessor(0) &&
8550 UI->
getParent() != Branch->getSuccessor(1)) ||
8551 (UI->
getParent() != Branch->getParent() &&
8552 !UI->
getParent()->getSinglePredecessor()))
8555if (CmpC.
isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8558if (UI->
getParent() != Branch->getParent())
8562 ConstantInt::get(UI->
getType(), 0));
8568if (Cmp->isEquality() &&
8572if (UI->
getParent() != Branch->getParent())
8576 ConstantInt::get(UI->
getType(), 0));
8586bool CodeGenPrepare::optimizeInst(
Instruction *
I, ModifyDT &ModifiedDT) {
8587bool AnyChange =
false;
8588 AnyChange = fixupDbgVariableRecordsOnInst(*
I);
8590// Bail out if we inserted the instruction to prevent optimizations from 8591// stepping on each other's toes. 8592if (InsertedInsts.count(
I))
8595// TODO: Move into the switch on opcode below here. 8597// It is possible for very late stage optimizations (such as SimplifyCFG) 8598// to introduce PHI nodes too late to be cleaned up. If we detect such a 8599// trivial PHI, go ahead and zap it here. 8601 LargeOffsetGEPMap.erase(
P);
8603P->eraseFromParent();
8610if (
CastInst *CI = dyn_cast<CastInst>(
I)) {
8611// If the source of the cast is a constant, then this should have 8612// already been constant folded. The only reason NOT to constant fold 8613// it is if something (e.g. LSR) was careful to place the constant 8614// evaluation in a block other than then one that uses it (e.g. to hoist 8615// the address of globals out of a loop). If this is the case, we don't 8616// want to forward-subst the cast. 8623if ((isa<UIToFPInst>(
I) || isa<SIToFPInst>(
I) || isa<FPToUIInst>(
I) ||
8624 isa<TruncInst>(
I)) &&
8626I, LI->getLoopFor(
I->getParent()), *
TTI))
8629if (isa<ZExtInst>(
I) || isa<SExtInst>(
I)) {
8630 /// Sink a zext or sext into its user blocks if the target type doesn't 8631 /// fit in one register 8634 TargetLowering::TypeExpandInteger) {
8638I, LI->getLoopFor(
I->getParent()), *
TTI))
8641bool MadeChange = optimizeExt(
I);
8642return MadeChange | optimizeExtUses(
I);
8648if (
auto *Cmp = dyn_cast<CmpInst>(
I))
8649if (optimizeCmp(Cmp, ModifiedDT))
8656if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
8657 LI->
setMetadata(LLVMContext::MD_invariant_group,
nullptr);
8667SI->setMetadata(LLVMContext::MD_invariant_group,
nullptr);
8668unsigned AS =
SI->getPointerAddressSpace();
8669return optimizeMemoryInst(
I,
SI->getOperand(1),
8670SI->getOperand(0)->getType(), AS);
8674unsigned AS = RMW->getPointerAddressSpace();
8675return optimizeMemoryInst(
I, RMW->getPointerOperand(), RMW->getType(), AS);
8679unsigned AS = CmpX->getPointerAddressSpace();
8680return optimizeMemoryInst(
I, CmpX->getPointerOperand(),
8681 CmpX->getCompareOperand()->getType(), AS);
8690// TODO: Move this into the switch on opcode - it handles shifts already. 8691if (BinOp && (BinOp->
getOpcode() == Instruction::AShr ||
8692 BinOp->
getOpcode() == Instruction::LShr)) {
8700if (GEPI->hasAllZeroIndices()) {
8701 /// The GEP operand must be a pointer, so must its result -> BitCast 8703 GEPI->getName(), GEPI->getIterator());
8704NC->setDebugLoc(GEPI->getDebugLoc());
8707 GEPI, TLInfo,
nullptr,
8708 [&](
Value *V) { removeAllAssertingVHReferences(V); });
8710 optimizeInst(
NC, ModifiedDT);
8719// freeze(icmp a, const)) -> icmp (freeze a), const 8720// This helps generate efficient conditional jumps. 8722if (
ICmpInst *
II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8724elseif (
FCmpInst *
F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8725 CmpI =
F->getFastMathFlags().none() ?
F :
nullptr;
8729bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8730 isa<ConstantPointerNull>(Op0);
8731bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8732 isa<ConstantPointerNull>(Op1);
8733if (Const0 || Const1) {
8734if (!Const0 || !Const1) {
8740 FI->eraseFromParent();
8747if (tryToSinkFreeOperands(
I))
8750switch (
I->getOpcode()) {
8751case Instruction::Shl:
8752case Instruction::LShr:
8753case Instruction::AShr:
8754return optimizeShiftInst(cast<BinaryOperator>(
I));
8755case Instruction::Call:
8757case Instruction::Select:
8758return optimizeSelectInst(cast<SelectInst>(
I));
8759case Instruction::ShuffleVector:
8760return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(
I));
8761case Instruction::Switch:
8762return optimizeSwitchInst(cast<SwitchInst>(
I));
8763case Instruction::ExtractElement:
8764return optimizeExtractElementInst(cast<ExtractElementInst>(
I));
8765case Instruction::Br:
8772/// Given an OR instruction, check to see if this is a bitreverse 8773/// idiom. If so, insert the new intrinsic and return true. 8775if (!
I.getType()->isIntegerTy() ||
8787 [&](
Value *V) { removeAllAssertingVHReferences(V); });
8791// In this pass we look for GEP and cast instructions that are used 8792// across basic blocks and rewrite them to improve basic-block-at-a-time 8794bool CodeGenPrepare::optimizeBlock(
BasicBlock &BB, ModifyDT &ModifiedDT) {
8796bool MadeChange =
false;
8799 CurInstIterator = BB.
begin();
8800 ModifiedDT = ModifyDT::NotModifyDT;
8801while (CurInstIterator != BB.
end()) {
8802 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8803if (ModifiedDT != ModifyDT::NotModifyDT) {
8804// For huge function we tend to quickly go though the inner optmization 8805// opportunities in the BB. So we go back to the BB head to re-optimize 8806// each instruction instead of go back to the function head. 8816 }
while (ModifiedDT == ModifyDT::ModifyInstDT);
8818bool MadeBitReverse =
true;
8819while (MadeBitReverse) {
8820 MadeBitReverse =
false;
8822if (makeBitReverse(
I)) {
8823 MadeBitReverse = MadeChange =
true;
8828 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8833// Some CGP optimizations may move or alter what's computed in a block. Check 8834// whether a dbg.value intrinsic could be pointed at a more appropriate operand. 8839// Does this dbg.value refer to a sunk address calculation? 8840bool AnyChange =
false;
8843for (
Value *Location : LocationOps) {
8847// Point dbg.value at locally computed address, which should give the best 8848// opportunity to be accurately lowered. This update may change the type 8849// of pointer being referred to; however this makes no difference to 8850// debugging information, and we can't generate bitcasts that may affect 8859bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(
Instruction &
I) {
8860bool AnyChange =
false;
8862 AnyChange |= fixupDbgVariableRecord(DVR);
8866// FIXME: should updating debug-info really cause the "changed" flag to fire, 8867// which can cause a function to be reprocessed? 8869if (DVR.
Type != DbgVariableRecord::LocationType::Value &&
8870 DVR.
Type != DbgVariableRecord::LocationType::Assign)
8873// Does this DbgVariableRecord refer to a sunk address calculation? 8874bool AnyChange =
false;
8877for (
Value *Location : LocationOps) {
8881// Point dbg.value at locally computed address, which should give the best 8882// opportunity to be accurately lowered. This update may change the type 8883// of pointer being referred to; however this makes no difference to 8884// debugging information, and we can't generate bitcasts that may affect 8895if (isa<PHINode>(VI))
8896 DVI->
insertBefore(VI->getParent()->getFirstInsertionPt());
8904if (isa<PHINode>(VI))
8910// A llvm.dbg.value may be using a value before its definition, due to 8911// optimizations in this pass and others. Scan for such dbg.values, and rescue 8912// them by moving the dbg.value to immediately after the value definition. 8913// FIXME: Ideally this should never be necessary, and this has the potential 8914// to re-order dbg.value intrinsics. 8915bool CodeGenPrepare::placeDbgValues(
Function &
F) {
8916bool MadeChange =
false;
8919auto DbgProcessor = [&](
auto *DbgItem,
Instruction *Position) {
8921for (
Value *V : DbgItem->location_ops())
8922if (
Instruction *VI = dyn_cast_or_null<Instruction>(V))
8925// This item may depend on multiple instructions, complicating any 8926// potential sink. This block takes the defensive approach, opting to 8927// "undef" the item if it has more than one instruction and any of them do 8930if (
VI->isTerminator())
8933// If VI is a phi in a block with an EHPad terminator, we can't insert 8935if (isa<PHINode>(VI) &&
VI->getParent()->getTerminator()->isEHPad())
8938// If the defining instruction dominates the dbg.value, we do not need 8939// to move the dbg.value. 8943// If we depend on multiple instructions and any of them doesn't 8944// dominate this DVI, we probably can't salvage it: moving it to 8945// after any of the instructions could cause us to lose the others. 8946if (VIs.size() > 1) {
8949 <<
"Unable to find valid location for Debug Value, undefing:\n" 8951 DbgItem->setKillLocation();
8956 << *DbgItem <<
' ' << *VI);
8965// Process dbg.value intrinsics. 8968 DbgProcessor(DVI, DVI);
8972// If this isn't a dbg.value, process any attached DbgVariableRecord 8973// records attached to this instruction. 8976if (DVR.
Type != DbgVariableRecord::LocationType::Value)
8978 DbgProcessor(&DVR, &
Insn);
8986// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered 8987// probes can be chained dependencies of other regular DAG nodes and block DAG 8988// combine optimizations. 8989bool CodeGenPrepare::placePseudoProbes(
Function &
F) {
8990bool MadeChange =
false;
8992// Move the rest probes to the beginning of the block. 8993auto FirstInst =
Block.getFirstInsertionPt();
8994while (FirstInst !=
Block.end() && FirstInst->isDebugOrPseudoInst())
8999if (
auto *
II = dyn_cast<PseudoProbeInst>(
I++)) {
9000II->moveBefore(FirstInst);
9008/// Scale down both weights to fit into uint32_t. 9010uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9011uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9012 NewTrue = NewTrue / Scale;
9013 NewFalse = NewFalse / Scale;
9016/// Some targets prefer to split a conditional branch like: 9018/// %0 = icmp ne i32 %a, 0 9019/// %1 = icmp ne i32 %b, 0 9020/// %or.cond = or i1 %0, %1 9021/// br i1 %or.cond, label %TrueBB, label %FalseBB 9023/// into multiple branch instructions like: 9026/// %0 = icmp ne i32 %a, 0 9027/// br i1 %0, label %TrueBB, label %bb2 9029/// %1 = icmp ne i32 %b, 0 9030/// br i1 %1, label %TrueBB, label %FalseBB 9032/// This usually allows instruction selection to do even further optimizations 9033/// and combine the compare with the branch instruction. Currently this is 9034/// applied for targets which have "cheap" jump instructions. 9036/// FIXME: Remove the (equivalent?) implementation in SelectionDAG. 9038bool CodeGenPrepare::splitBranchCondition(
Function &
F, ModifyDT &ModifiedDT) {
9042bool MadeChange =
false;
9044// Does this BB end with the following? 9045// %cond1 = icmp|fcmp|binary instruction ... 9046// %cond2 = icmp|fcmp|binary instruction ... 9047// %cond.or = or|and i1 %cond1, cond2 9048// br i1 %cond.or label %dest1, label %dest2" 9056if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9059// The merging of mostly empty BB can cause a degenerate branch. 9064Value *Cond1, *Cond2;
9067 Opc = Instruction::And;
9070 Opc = Instruction::Or;
9080if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9092// Update original basic block by using the first condition directly by the 9093// branch instruction and removing the no longer needed and/or instruction. 9094 Br1->setCondition(Cond1);
9097// Depending on the condition we have to either replace the true or the 9098// false successor of the original branch instruction. 9099if (Opc == Instruction::And)
9100 Br1->setSuccessor(0, TmpBB);
9102 Br1->setSuccessor(1, TmpBB);
9104// Fill in the new basic block. 9106if (
auto *
I = dyn_cast<Instruction>(Cond2)) {
9107I->removeFromParent();
9108I->insertBefore(Br2->getIterator());
9111// Update PHI nodes in both successors. The original BB needs to be 9112// replaced in one successor's PHI nodes, because the branch comes now from 9113// the newly generated BB (NewBB). In the other successor we need to add one 9114// incoming edge to the PHI nodes, because both branch instructions target 9115// now the same successor. Depending on the original branch condition 9116// (and/or) we have to swap the successors (TrueDest, FalseDest), so that 9117// we perform the correct update for the PHI nodes. 9118// This doesn't change the successor order of the just created branch 9119// instruction (or any other instruction). 9120if (Opc == Instruction::Or)
9123// Replace the old BB with the new BB. 9126// Add another incoming edge from the new BB. 9132// Update the branch weights (from SelectionDAGBuilder:: 9133// FindMergedConditions). 9134if (Opc == Instruction::Or) {
9144// We have flexibility in setting Prob for BB1 and Prob for NewBB. 9145// The requirement is that 9146// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 9147// = TrueProb for original BB. 9148// Assuming the original weights are A and B, one choice is to set BB1's 9149// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 9151// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 9152// Another choice is to assume TrueProb for BB1 equals to TrueProb for 9153// TmpBB, but the math is more complicated. 9156uint64_t NewTrueWeight = TrueWeight;
9157uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9159 Br1->setMetadata(LLVMContext::MD_prof,
9164 NewTrueWeight = TrueWeight;
9165 NewFalseWeight = 2 * FalseWeight;
9167 Br2->setMetadata(LLVMContext::MD_prof,
9180// This requires creation of TmpBB after CurBB. 9182// We have flexibility in setting Prob for BB1 and Prob for TmpBB. 9183// The requirement is that 9184// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 9185// = FalseProb for original BB. 9186// Assuming the original weights are A and B, one choice is to set BB1's 9187// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 9189// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 9192uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9193uint64_t NewFalseWeight = FalseWeight;
9195 Br1->setMetadata(LLVMContext::MD_prof,
9199 NewTrueWeight = 2 * TrueWeight;
9200 NewFalseWeight = FalseWeight;
9202 Br2->setMetadata(LLVMContext::MD_prof,
9208 ModifiedDT = ModifyDT::ModifyBBDT;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static void DbgInserterHelper(DbgValueInst *DVI, BasicBlock::iterator VI)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, const TargetTransformInfo &TTI, const DataLayout &DL)
Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1.
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
unsigned logBase2() const
APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Value handle that asserts if the Value is deleted.
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
A constant value that is initialized with an expression using other constant values.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
This represents the llvm.dbg.value instruction.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getTrue()
Get the constant value for i1 true.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Value * createIsFPClass(Value *FPNum, unsigned Test)
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
const Function * getFunction() const
Return the function this instruction belongs to.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
iterator find(const KeyT &Key)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
PointerIntPair - This class implements a pair of a pointer and small integer.
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Basic
The cost of a typical 'add' instruction.
bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
@ OK_UniformConstantValue
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVMContext & getContext() const
All values hold a context through their type.
unsigned getNumUses() const
This method computes the number of uses of this Value.
iterator_range< use_iterator > uses()
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
user_iterator_impl< User > user_iterator
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
void dump() const
Support for debugging, callable in GDB: V->dump()
Value handle that is nullable, but tries to track the Value.
bool pointsToAliveValue() const
This class represents zero extension of integer types.
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
constexpr bool isNonZero() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
StructType * getStructTypeOrNull() const
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
auto pred_end(const MachineBasicBlock *BB)
APInt operator*(APInt a, uint64_t RHS)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
auto successors(const MachineBasicBlock *BB)
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
void sort(IteratorTy Start, IteratorTy End)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
bool VerifyLoopInfo
Enable verification of loop info.
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ And
Bitwise or logical AND of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool pred_empty(const BasicBlock *BB)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.