1//===- InstCombineVectorOps.cpp -------------------------------------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file implements instcombine for ExtractElement, InsertElement and 12//===----------------------------------------------------------------------===// 44#define DEBUG_TYPE "instcombine" 47using namespacePatternMatch;
50"Number of aggregate reconstructions turned into reuse of the " 53/// Return true if the value is cheaper to scalarize than it is to leave as a 54/// vector operation. If the extract index \p EI is a constant integer then 55/// some operations may be cheap to scalarize. 57/// FIXME: It's possible to create more instructions than previously existed. 61// If we can pick a scalar constant value out of a vector, that is free. 62if (
auto *
C = dyn_cast<Constant>(V))
63return CEI ||
C->getSplatValue();
65if (CEI &&
match(V, m_Intrinsic<Intrinsic::stepvector>())) {
66ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67// Index needs to be lower than the minimum size of the vector, because 68// for scalable vector, the vector size is known at run time. 72// An insertelement to the same constant index as our extract will simplify 73// to the scalar inserted element. An insertelement to a different constant 74// index is irrelevant to our extract. 97// If we have a PHI node with a vector type that is only used to feed 98// itself and be an operand of extractelement at a constant location, 99// try to replace the PHI of the vector type with a PHI of a scalar type. 103// The users we want the PHI to have are: 104// 1) The EI ExtractElement (we already know this) 105// 2) Possibly more ExtractElements with the same index. 106// 3) Another operand, which will feed back into the PHI. 108for (
auto *U : PN->
users()) {
115 PHIUser = cast<Instruction>(U);
124// Verify that this PHI user has one use, which is the PHI itself, 125// and that it is a binary operation which is cheap to scalarize. 126// otherwise return nullptr. 128 !(isa<BinaryOperator>(PHIUser)) ||
132// Create a scalar PHI node that will replace the vector PHI node 133// just before the current PHI node. 136// Scalarize each PHI operand. 141// If the operand is the PHI induction variable: 142if (PHIInVal == PHIUser) {
143// Scalarize the binary operation. Its first operand is the 144// scalar PHI, and the second operand is extracted from the other 147unsigned opId = (B0->
getOperand(0) == PN) ? 1 : 0;
157// Scalarize PHI input: 159// Insert the new instruction into the predecessor basic block. 162if (pos && !isa<PHINode>(pos)) {
174for (
auto *E : Extracts) {
176// Add old extract to worklist for DCE. 191 cast<VectorType>(
Ext.getVectorOperandType())->getElementCount();
196// If we are casting an integer to vector and extracting a portion, that is 197// a shift-right and truncate. 198if (
X->getType()->isIntegerTy()) {
199assert(isa<FixedVectorType>(
Ext.getVectorOperand()->getType()) &&
200"Expected fixed vector type for bitcast from scalar integer");
202// Big endian requires adjusting the extract index since MSB is at index 0. 203// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8 204// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8 207unsigned ShiftAmountC = ExtIndexC * DestWidth;
209 isDesirableIntType(
X->getType()->getPrimitiveSizeInBits())) &&
210Ext.getVectorOperand()->hasOneUse()) {
222if (!
X->getType()->isVectorTy())
225// If this extractelement is using a bitcast from a vector of the same number 226// of elements, see if we can find the source element from the source vector: 227// extelt (bitcast VecX), IndexC --> bitcast X[IndexC] 228auto *SrcTy = cast<VectorType>(
X->getType());
230if (NumSrcElts == NumElts)
235"Src and Dst must be the same sort of vector type");
237// If the source elements are wider than the destination, try to shift and 238// truncate a subset of scalar bits of an insert op. 247// The extract must be from the subset of vector elements that we inserted 248// into. Example: if we inserted element 1 of a <2 x i64> and we are 249// extracting an i16 (narrowing ratio = 4), then this extract must be from 1 250// of elements 4-7 of the bitcasted vector. 251unsigned NarrowingRatio =
254if (ExtIndexC / NarrowingRatio != InsIndexC) {
255// Remove insertelement, if we don't use the inserted element. 256// extractelement (bitcast (insertelement (Vec, b)), a) -> 257// extractelement (bitcast (Vec), a) 258// FIXME: this should be removed to SimplifyDemandedVectorElts, 259// once scale vectors are supported. 260if (
X->hasOneUse() &&
Ext.getVectorOperand()->hasOneUse()) {
267// We are extracting part of the original scalar. How that scalar is 268// inserted into the vector depends on the endian-ness. Example: 269// Vector Byte Elt Index: 0 1 2 3 4 5 6 7 270// +--+--+--+--+--+--+--+--+ 271// inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3| 272// extelt <4 x i16> V', 3: | |S2|S3| 273// +--+--+--+--+--+--+--+--+ 274// If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value. 275// If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value. 276// In this example, we must right-shift little-endian. Big-endian is just a 278unsigned Chunk = ExtIndexC % NarrowingRatio;
280 Chunk = NarrowingRatio - 1 - Chunk;
282// Bail out if this is an FP vector to FP vector sequence. That would take 283// more instructions than we started with unless there is no shift, and it 284// may not be handled as well in the backend. 285bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
287if (NeedSrcBitcast && NeedDestBitcast)
290unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291unsigned ShAmt = Chunk * DestWidth;
293// TODO: This limitation is more strict than necessary. We could sum the 294// number of new instructions and subtract the number eliminated to know if 296if (!
X->hasOneUse() || !
Ext.getVectorOperand()->hasOneUse())
297if (NeedSrcBitcast || NeedDestBitcast)
306// Bail out if we could end with more instructions than we started with. 307if (!
Ext.getVectorOperand()->hasOneUse())
312if (NeedDestBitcast) {
322/// Find elements of V demanded by UserInstr. 324unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
326// Conservatively assume that all elements are needed. 330case Instruction::ExtractElement: {
334if (EEIIndexC && EEIIndexC->
getValue().
ult(VWidth)) {
339case Instruction::ShuffleVector: {
341unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->
getType())->getNumElements();
344 UsedElts =
APInt(VWidth, 0);
345for (
unsigned i = 0; i < MaskNumElts; i++) {
347if (MaskVal == -1u || MaskVal >= 2 * VWidth)
349if (Shuffle->
getOperand(0) == V && (MaskVal < VWidth))
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.
setBit(MaskVal - VWidth);
363/// Find union of elements of V demanded by all its users. 364/// If it is known by querying findDemandedEltsBySingleUser that 365/// no user demands an element of V, then the corresponding bit 366/// remains unset in the returned value. 368unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
370APInt UnionUsedElts(VWidth, 0);
371for (
constUse &U : V->uses()) {
372if (
Instruction *
I = dyn_cast<Instruction>(U.getUser())) {
386/// Given a constant index for a extractelement or insertelement instruction, 387/// return it with the canonical type if it isn't already canonical. We 388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't 389/// matter, we just want a consistent type to simplify CSE. 405// extractelt (select %x, %vec1, %vec2), %const -> 406// select %x, %vec1[%const], %vec2[%const] 407// TODO: Support constant folding of multiple select operands: 408// extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2) 409// If the extractelement will for instance try to do out of bounds accesses 410// because of the values of %c1 and/or %c2, the sequence could be optimized 411// early. This is currently not possible because constant folding will reach 412// an unreachable assertion if it doesn't find a constant operand. 414if (SI->getCondition()->getType()->isIntegerTy() &&
419// If extracting a specified index from the vector, see if we can recursively 420// find a previously computed scalar that was inserted into the vector. 421auto *IndexC = dyn_cast<ConstantInt>(Index);
422bool HasKnownValidIndex =
false;
424// Canonicalize type of constant indices to i64 to simplify CSE 429unsigned NumElts = EC.getKnownMinValue();
430 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
434// Index needs to be lower than the minimum size of the vector, because 435// for scalable vector, the vector size is known at run time. 436if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
440// Return index when its value does not exceed the allowed limit 441// for the element type of the vector, otherwise return undefined. 442if (IndexC->getValue().getActiveBits() <=
BitWidth)
443Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(
BitWidth));
450// InstSimplify should handle cases where the index is invalid. 451// For fixed-length vector, it's invalid to extract out-of-range element. 452if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
458// If there's a vector PHI feeding a scalar use through this extractelement 459// instruction, try to scalarize the PHI. 460if (
auto *Phi = dyn_cast<PHINode>(SrcVec))
465// TODO come up with a n-ary matcher that subsumes both unary and 469// extelt (unop X), Index --> unop (extelt X, Index) 475// If the binop is not speculatable, we cannot hoist the extractelement if 476// it may make the operand poison. 479 (HasKnownValidIndex ||
481// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index) 492// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index) 495CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
500if (
auto *
I = dyn_cast<Instruction>(SrcVec)) {
501if (
auto *IE = dyn_cast<InsertElementInst>(
I)) {
502// instsimplify already handled the case where the indices are constants 503// and equal by value, if both are constants, they must not be the same 504// value, extract from the pre-inserted value instead. 505if (isa<Constant>(IE->getOperand(2)) && IndexC)
507 }
elseif (
auto *
GEP = dyn_cast<GetElementPtrInst>(
I)) {
508auto *VecType = cast<VectorType>(
GEP->getType());
510uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
511if (IndexC && IdxVal < EC.getKnownMinValue() &&
GEP->hasOneUse()) {
512// Find out why we have a vector result - these are a few examples: 513// 1. We have a scalar pointer and a vector of indices, or 514// 2. We have a vector of pointers and a scalar index, or 515// 3. We have a vector of pointers and a vector of indices, etc. 516// Here we only consider combining when there is exactly one vector 517// operand, since the optimization is less obviously a win due to 518// needing more than one extractelements. 522 return isa<VectorType>(V->getType());
525Value *NewPtr =
GEP->getPointerOperand();
526if (isa<VectorType>(NewPtr->
getType()))
530for (
unsignedI = 1;
I !=
GEP->getNumOperands(); ++
I) {
532if (isa<VectorType>(
Op->getType()))
539GEP->getSourceElementType(), NewPtr, NewOps);
544 }
elseif (
auto *SVI = dyn_cast<ShuffleVectorInst>(
I)) {
545// If this is extracting an element from a shufflevector, figure out where 546// it came from and extract from the appropriate input element instead. 547// Restrict the following transformation to fixed-length vector. 548if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
550 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
552unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
557if (SrcIdx < (
int)LHSWidth)
558 Src = SVI->getOperand(0);
561 Src = SVI->getOperand(1);
565 Src, ConstantInt::get(Int64Ty, SrcIdx,
false));
567 }
elseif (
auto *CI = dyn_cast<CastInst>(
I)) {
568// Canonicalize extractelement(cast) -> cast(extractelement). 569// Bitcasts can change the number of vector elements, and they cost 571if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
578// Run demanded elements after other transforms as this can drop flags on 579// binops. If there's two paths to the same final result, we prefer the 580// one which doesn't force us to drop flags. 583unsigned NumElts = EC.getKnownMinValue();
584// This instruction only demands the single element from the input vector. 585// Skip for scalable type, the number of elements is unknown at 587if (!EC.isScalable() && NumElts != 1) {
588// If the input vector has a single use, simplify it based on this use 591APInt PoisonElts(NumElts, 0);
592APInt DemandedElts(NumElts, 0);
593 DemandedElts.
setBit(IndexC->getZExtValue());
598// If the input vector has multiple uses, simplify it based on a union 599// of all elements used. 602APInt PoisonElts(NumElts, 0);
604 SrcVec, DemandedElts, PoisonElts, 0
/* Depth */,
605true/* AllowMultipleUsers */)) {
619/// If V is a shuffle of values that ONLY returns elements from either LHS or 620/// RHS, return the shuffle mask and true. Otherwise, return false. 624"Invalid CollectSingleShuffleElements");
625unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
628 Mask.assign(NumElts, -1);
633for (
unsigned i = 0; i != NumElts; ++i)
639for (
unsigned i = 0; i != NumElts; ++i)
640 Mask.push_back(i + NumElts);
645// If this is an insert of an extract from some other vector, include it. 646Value *VecOp = IEI->getOperand(0);
647Value *ScalarOp = IEI->getOperand(1);
648Value *IdxOp = IEI->getOperand(2);
650if (!isa<ConstantInt>(IdxOp))
652unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
654if (isa<PoisonValue>(ScalarOp)) {
// inserting poison into vector. 655// We can handle this if the vector we are inserting into is 658// If so, update the mask to reflect the inserted poison. 659 Mask[InsertedIdx] = -1;
664unsigned ExtractedIdx =
665 cast<ConstantInt>(EI->
getOperand(1))->getZExtValue();
667 cast<FixedVectorType>(
LHS->
getType())->getNumElements();
669// This must be extracting from either LHS or RHS. 671// We can handle this if the vector we are inserting into is 674// If so, update the mask to reflect the inserted value. 676 Mask[InsertedIdx % NumElts] = ExtractedIdx;
679 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
691/// If we have insertion into a vector that is wider than the vector that we 692/// are extracting from, try to widen the source vector to allow a single 693/// shufflevector to replace one or more insert/extract pairs. 697auto *InsVecType = cast<FixedVectorType>(InsElt->
getType());
699unsigned NumInsElts = InsVecType->getNumElements();
700unsigned NumExtElts = ExtVecType->getNumElements();
702// The inserted-to vector must be wider than the extracted-from vector. 703if (InsVecType->getElementType() != ExtVecType->getElementType() ||
704 NumExtElts >= NumInsElts)
707// Create a shuffle mask to widen the extended-from vector using poison 708// values. The mask selects all of the values of the original vector followed 709// by as many poison values as needed to create a vector of the same length 710// as the inserted-to vector. 712for (
unsigned i = 0; i < NumExtElts; ++i)
714for (
unsigned i = NumExtElts; i < NumInsElts; ++i)
718auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
719BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
723// TODO: This restriction matches the basic block check below when creating 724// new extractelement instructions. If that limitation is removed, this one 725// could also be removed. But for now, we just bail out to ensure that we 726// will replace the extractelement instruction that is feeding our 727// insertelement instruction. This allows the insertelement to then be 728// replaced by a shufflevector. If the insertelement is not replaced, we can 729// induce infinite looping because there's an optimization for extractelement 730// that will delete our widening shuffle. This would trigger another attempt 731// here to create that shuffle, and we spin forever. 732if (InsertionBlock != InsElt->
getParent())
735// TODO: This restriction matches the check in visitInsertElementInst() and 736// prevents an infinite loop caused by not turning the extract/insert pair 737// into a shuffle. We really should not need either check, but we're lacking 738// folds for shufflevectors because we're afraid to generate shuffle masks 739// that the backend can't handle. 745// Insert the new shuffle after the vector operand of the extract is defined 746// (as long as it's not a PHI) or at the start of the basic block of the 747// extract, so any subsequent extracts in the same basic block can use it. 748// TODO: Insert before the earliest ExtractElementInst that is replaced. 749if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
750 WideVec->insertAfter(ExtVecOpInst->getIterator());
754// Replace extracts from the original narrow vector with extracts from the new 758if (!OldExt || OldExt->
getParent() != WideVec->getParent())
763// Add the old extracts to the worklist for DCE. We can't remove the 764// extracts directly, because they may still be used by the calling code. 771/// We are building a shuffle to create V, which is a sequence of insertelement, 772/// extractelement pairs. If PermittedRHS is set, then we must either use it or 773/// not rely on the second vector source. Return a std::pair containing the 774/// left and right vectors of the proposed shuffle (or 0), and set the Mask 775/// parameter as required. 777/// Note: we intentionally don't try to fold earlier shuffles since they have 778/// often been chosen carefully to be efficiently implementable on the target. 784assert(V->getType()->isVectorTy() &&
"Invalid shuffle!");
785unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
788 Mask.assign(NumElts, -1);
789return std::make_pair(
793if (isa<ConstantAggregateZero>(V)) {
794 Mask.assign(NumElts, 0);
795return std::make_pair(V,
nullptr);
799// If this is an insert of an extract from some other vector, include it. 800Value *VecOp = IEI->getOperand(0);
801Value *ScalarOp = IEI->getOperand(1);
802Value *IdxOp = IEI->getOperand(2);
805if (isa<ConstantInt>(EI->
getOperand(1)) && isa<ConstantInt>(IdxOp)) {
806unsigned ExtractedIdx =
807 cast<ConstantInt>(EI->
getOperand(1))->getZExtValue();
808unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
810// Either the extracted from or inserted into vector must be RHSVec, 811// otherwise we'd end up with a shuffle of three inputs. 812if (EI->
getOperand(0) == PermittedRHS || PermittedRHS ==
nullptr) {
815assert(LR.second ==
nullptr || LR.second ==
RHS);
818// Although we are giving up for now, see if we can create extracts 819// that match the inserts for another round of combining. 823// We tried our best, but we can't find anything compatible with RHS 824// further up the chain. Return a trivial shuffle. 825for (
unsigned i = 0; i < NumElts; ++i)
827return std::make_pair(V,
nullptr);
831 cast<FixedVectorType>(
RHS->
getType())->getNumElements();
832 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
833return std::make_pair(LR.first,
RHS);
836if (VecOp == PermittedRHS) {
837// We've gone as far as we can: anything on the other side of the 838// extractelement will already have been converted into a shuffle. 842for (
unsigned i = 0; i != NumElts; ++i)
843 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
844return std::make_pair(EI->
getOperand(0), PermittedRHS);
847// If this insertelement is a chain that comes from exactly these two 848// vectors, return the vector and the effective shuffle. 852return std::make_pair(EI->
getOperand(0), PermittedRHS);
857// Otherwise, we can't do anything fancy. Return an identity vector. 858for (
unsigned i = 0; i != NumElts; ++i)
860return std::make_pair(V,
nullptr);
863/// Look for chain of insertvalue's that fully define an aggregate, and trace 864/// back the values inserted, see if they are all were extractvalue'd from 865/// the same source aggregate from the exact same element indexes. 866/// If they were, just reuse the source aggregate. 867/// This potentially deals with PHI indirections. 883// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able 884// to handle clang C++ exception struct (which is hardcoded as {i8*, i32}), 885// FIXME: any interesting patterns to be caught with larger limit? 886assert(NumAggElts > 0 &&
"Aggregate should have elements.");
890staticconstexprauto NotFound = std::nullopt;
891staticconstexprauto FoundMismatch =
nullptr;
893// Try to find a value of each element of an aggregate. 894// FIXME: deal with more complex, not one-dimensional, aggregate types 897// Do we know values for each element of the aggregate? 898auto KnowAllElts = [&AggElts]() {
904// Arbitrary `insertvalue` visitation depth limit. Let's be okay with 905// every element being overwritten twice, which should never happen. 906staticconstint DepthLimit = 2 * NumAggElts;
908// Recurse up the chain of `insertvalue` aggregate operands until either we've 909// reconstructed full initializer or can't visit any more `insertvalue`'s. 911Depth < DepthLimit && CurrIVI && !KnowAllElts();
912 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
915 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
917returnnullptr;
// Inserted value must be produced by an instruction. 921// Don't bother with more than single-level aggregates. 922if (Indices.
size() != 1)
923returnnullptr;
// FIXME: deal with more complex aggregates? 925// Now, we may have already previously recorded the value for this element 926// of an aggregate. If we did, that means the CurrIVI will later be 927// overwritten with the already-recorded value. But if not, let's record it! 928 std::optional<Instruction *> &Elt = AggElts[Indices.
front()];
929 Elt = Elt.value_or(InsertedValue);
931// FIXME: should we handle chain-terminating undef base operand? 934// Was that sufficient to deduce the full initializer for the aggregate? 936returnnullptr;
// Give up then. 938// We now want to find the source[s] of the aggregate elements we've found. 939// And with "source" we mean the original aggregate[s] from which 940// the inserted elements were extracted. This may require PHI translation. 942enum class AggregateDescription {
943 /// When analyzing the value that was inserted into an aggregate, we did 944 /// not manage to find defining `extractvalue` instruction to analyze. 946 /// When analyzing the value that was inserted into an aggregate, we did 947 /// manage to find defining `extractvalue` instruction[s], and everything 948 /// matched perfectly - aggregate type, element insertion/extraction index. 950 /// When analyzing the value that was inserted into an aggregate, we did 951 /// manage to find defining `extractvalue` instruction, but there was 952 /// a mismatch: either the source type from which the extraction was didn't 953 /// match the aggregate type into which the insertion was, 954 /// or the extraction/insertion channels mismatched, 955 /// or different elements had different source aggregates. 958auto Describe = [](std::optional<Value *> SourceAggregate) {
959if (SourceAggregate == NotFound)
960return AggregateDescription::NotFound;
961if (*SourceAggregate == FoundMismatch)
962return AggregateDescription::FoundMismatch;
963return AggregateDescription::Found;
966// If an aggregate element is defined in UseBB, we can't use it in PredBB. 967bool EltDefinedInUseBB =
false;
969// Given the value \p Elt that was being inserted into element \p EltIdx of an 970// aggregate AggTy, see if \p Elt was originally defined by an 971// appropriate extractvalue (same element index, same aggregate type). 972// If found, return the source aggregate from which the extraction was. 973// If \p PredBB is provided, does PHI translation of an \p Elt first. 974auto FindSourceAggregate =
975 [&](
Instruction *Elt,
unsigned EltIdx, std::optional<BasicBlock *> UseBB,
976 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
977// For now(?), only deal with, at most, a single level of PHI indirection. 978if (UseBB && PredBB) {
981 EltDefinedInUseBB =
true;
983// FIXME: deal with multiple levels of PHI indirection? 985// Did we find an extraction? 986auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
990Value *SourceAggregate = EVI->getAggregateOperand();
992// Is the extraction from the same type into which the insertion was? 993if (SourceAggregate->
getType() != AggTy)
995// And the element index doesn't change between extraction and insertion? 996if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
999return SourceAggregate;
// AggregateDescription::Found 1002// Given elements AggElts that were constructing an aggregate OrigIVI, 1003// see if we can find appropriate source aggregate for each of the elements, 1004// and see it's the same aggregate for each element. If so, return it. 1005auto FindCommonSourceAggregate =
1006 [&](std::optional<BasicBlock *> UseBB,
1007 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1008 std::optional<Value *> SourceAggregate;
1011assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1012"We don't store nullptr in SourceAggregate!");
1013assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1015"SourceAggregate should be valid after the first element,");
1017// For this element, is there a plausible source aggregate? 1018// FIXME: we could special-case undef element, IFF we know that in the 1019// source aggregate said element isn't poison. 1020 std::optional<Value *> SourceAggregateForElement =
1021 FindSourceAggregate(*
I.value(),
I.index(), UseBB, PredBB);
1023// Okay, what have we found? Does that correlate with previous findings? 1025// Regardless of whether or not we have previously found source 1026// aggregate for previous elements (if any), if we didn't find one for 1027// this element, passthrough whatever we have just found. 1028if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1029return SourceAggregateForElement;
1031// Okay, we have found source aggregate for this element. 1032// Let's see what we already know from previous elements, if any. 1033switch (Describe(SourceAggregate)) {
1034case AggregateDescription::NotFound:
1035// This is apparently the first element that we have examined. 1036 SourceAggregate = SourceAggregateForElement;
// Record the aggregate! 1037continue;
// Great, now look at next element. 1038case AggregateDescription::Found:
1039// We have previously already successfully examined other elements. 1040// Is this the same source aggregate we've found for other elements? 1041if (*SourceAggregateForElement != *SourceAggregate)
1042return FoundMismatch;
1043continue;
// Still the same aggregate, look at next element. 1044case AggregateDescription::FoundMismatch:
1049assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1050"Must be a valid Value");
1051return *SourceAggregate;
1054 std::optional<Value *> SourceAggregate;
1056// Can we find the source aggregate without looking at predecessors? 1057 SourceAggregate = FindCommonSourceAggregate(
/*UseBB=*/std::nullopt,
1058/*PredBB=*/std::nullopt);
1059if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1060if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1061returnnullptr;
// Conflicting source aggregates! 1062 ++NumAggregateReconstructionsSimplified;
1066// Okay, apparently we need to look at predecessors. 1068// We should be smart about picking the "use" basic block, which will be the 1069// merge point for aggregate, where we'll insert the final PHI that will be 1070// used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice. 1071// We should look in which blocks each of the AggElts is being defined, 1072// they all should be defined in the same basic block. 1075for (
const std::optional<Instruction *> &
I : AggElts) {
1077// If it's the first instruction we've encountered, record the basic block. 1082// Otherwise, this must be the same basic block we've seen previously. 1087// If *all* of the elements are basic-block-independent, meaning they are 1088// either function arguments, or constant expressions, then if we didn't 1089// handle them without predecessor-aware handling, we won't handle them now. 1093// If we didn't manage to find source aggregate without looking at 1094// predecessors, and there are no predecessors to look at, then we're done. 1098// Arbitrary predecessor count limit. 1099staticconstint PredCountLimit = 64;
1101// Cache the (non-uniqified!) list of predecessors in a vector, 1102// checking the limit at the same time for efficiency. 1105// Don't bother if there are too many predecessors. 1106if (Preds.
size() >= PredCountLimit)
// FIXME: only count duplicates once? 1111// For each predecessor, what is the source aggregate, 1112// from which all the elements were originally extracted from? 1113// Note that we want for the map to have stable iteration order! 1115bool FoundSrcAgg =
false;
1117 std::pair<
decltype(SourceAggregates)::iterator,
bool>
IV =
1118 SourceAggregates.
insert({Pred,
nullptr});
1119// Did we already evaluate this predecessor? 1123// Let's hope that when coming from predecessor Pred, all elements of the 1124// aggregate produced by OrigIVI must have been originally extracted from 1125// the same aggregate. Is that so? Can we find said original aggregate? 1126 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1127if (Describe(SourceAggregate) == AggregateDescription::Found) {
1129IV.first->second = *SourceAggregate;
1131// If UseBB is the single successor of Pred, we can add InsertValue to 1133auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1134if (!BI || !BI->isUnconditional())
1142// Do some sanity check if we need to add insertvalue into predecessors. 1143auto OrigBB = OrigIVI.getParent();
1144for (
auto &It : SourceAggregates) {
1145if (Describe(It.second) == AggregateDescription::Found)
1148// Element is defined in UseBB, so it can't be used in predecessors. 1149if (EltDefinedInUseBB)
1152// Do this transformation cross loop boundary may cause dead loop. So we 1153// should avoid this situation. But LoopInfo is not generally available, we 1154// must be conservative here. 1155// If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB 1156// can't be in inner loop. 1160// Avoid constructing constant aggregate because constant value may expose 1161// more optimizations. 1163for (
auto Val : AggElts) {
1165if (!isa<Constant>(Elt)) {
1174// For predecessors without appropriate source aggregate, create one in the 1176for (
auto &It : SourceAggregates) {
1177if (Describe(It.second) == AggregateDescription::Found)
1185V = Builder.CreateInsertValue(V, Elt,
Idx);
1191// All good! Now we just need to thread the source aggregates here. 1192// Note that we have to insert the new PHI here, ourselves, because we can't 1193// rely on InstCombinerImpl::run() inserting it into the right basic block. 1194// Note that the same block can be a predecessor more than once, 1195// and we need to preserve that invariant for the PHI node. 1197 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1199 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() +
".merged");
1201PHI->addIncoming(SourceAggregates[Pred], Pred);
1203 ++NumAggregateReconstructionsSimplified;
1204return replaceInstUsesWith(OrigIVI,
PHI);
1207/// Try to find redundant insertvalue instructions, like the following ones: 1208/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 1209/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 1210/// Here the second instruction inserts values at the same indices, as the 1211/// first one, making the first one redundant. 1212/// It should be transformed to: 1213/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 1216I.getAggregateOperand(),
I.getInsertedValueOperand(),
I.getIndices(),
1220bool IsRedundant =
false;
1223// If there is a chain of insertvalue instructions (each of them except the 1224// last one has only one use and it's another insertvalue insn from this 1225// chain), check if any of the 'children' uses the same indices as the first 1226// instruction. In this case, the first one is redundant. 1229while (V->hasOneUse() &&
Depth < 10) {
1230User *U = V->user_back();
1231auto UserInsInst = dyn_cast<InsertValueInst>(U);
1232if (!UserInsInst || U->getOperand(0) != V)
1234if (UserInsInst->getIndices() == FirstIndices) {
1252// Can not analyze scalable type, the number of elements is not a compile-time 1261// A vector select does not change the size of the operands. 1262if (MaskSize != VecSize)
1265// Each mask element must be undefined or choose a vector element from one of 1266// the source operands without crossing vector lanes. 1267for (
int i = 0; i != MaskSize; ++i) {
1269if (Elt != -1 && Elt != i && Elt != i + VecSize)
1276/// Turn a chain of inserts that splats a value into an insert + shuffle: 1277/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... -> 1278/// shufflevector(insertelt(X, %k, 0), poison, zero) 1280// We are interested in the last insert in a chain. So if this insert has a 1281// single user and that user is an insert, bail. 1286// Can not handle scalable type, the number of elements is not a compile-time 1288if (isa<ScalableVectorType>(VecTy))
1290unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1292// Do not try to do this for a one-element vector, since that's a nop, 1293// and will cause an inf-loop. 1294if (NumElements == 1)
1302// Walk the chain backwards, keeping track of which indices we inserted into, 1303// until we hit something that isn't an insert of the splatted value. 1309auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->
getOperand(0));
1310// Check none of the intermediate steps have any additional uses, except 1311// for the root insertelement instruction, which can be re-used, if it 1312// inserts at position 0. 1313if (CurrIE != &InsElt &&
1314 (!CurrIE->
hasOneUse() && (NextIE !=
nullptr || !
Idx->isZero())))
1317 ElementPresent[
Idx->getZExtValue()] =
true;
1322// If this is just a single insertelement (not a sequence), we are done. 1323if (FirstIE == &InsElt)
1326// If we are not inserting into a poison vector, make sure we've seen an 1327// insert into every element. 1328// TODO: If the base vector is not undef, it might be better to create a splat 1329// and then a select-shuffle (blend) with the base vector. 1331if (!ElementPresent.
all())
1334// Create the insert + shuffle. 1337Constant *Zero = ConstantInt::get(Int64Ty, 0);
1338if (!cast<ConstantInt>(FirstIE->
getOperand(2))->isZero())
1342// Splat from element 0, but replace absent elements with poison in the mask. 1344for (
unsigned i = 0; i != NumElements; ++i)
1345if (!ElementPresent[i])
1351/// Try to fold an insert element into an existing splat shuffle by changing 1352/// the shuffle's mask to include the index of this insert element. 1354// Check if the vector operand of this insert is a canonical splat shuffle. 1355auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.
getOperand(0));
1356if (!Shuf || !Shuf->isZeroEltSplat())
1359// Bail out early if shuffle is scalable type. The number of elements in 1360// shuffle mask is unknown at compile-time. 1361if (isa<ScalableVectorType>(Shuf->getType()))
1364// Check for a constant insertion index. 1369// Check if the splat shuffle's input is the same as this insert's scalar op. 1371Value *Op0 = Shuf->getOperand(0);
1375// Replace the shuffle mask element at the index of this insert with a zero. 1377// inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1 1378// --> shuf (inselt undef, X, 0), poison, <0,0,0,undef> 1379unsigned NumMaskElts =
1380 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1382for (
unsigned i = 0; i != NumMaskElts; ++i)
1383 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1388/// Try to fold an extract+insert element into an existing identity shuffle by 1389/// changing the shuffle's mask to include the index of this insert element. 1391// Check if the vector operand of this insert is an identity shuffle. 1392auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.
getOperand(0));
1394 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1397// Bail out early if shuffle is scalable type. The number of elements in 1398// shuffle mask is unknown at compile-time. 1399if (isa<ScalableVectorType>(Shuf->getType()))
1402// Check for a constant insertion index. 1407// Check if this insert's scalar op is extracted from the identity shuffle's 1410Value *
X = Shuf->getOperand(0);
1414// Replace the shuffle mask element at the index of this extract+insert with 1415// that same index value. 1417// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask' 1418unsigned NumMaskElts =
1419 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1422for (
unsigned i = 0; i != NumMaskElts; ++i) {
1424// All mask elements besides the inserted element remain the same. 1425 NewMask[i] = OldMask[i];
1426 }
elseif (OldMask[i] == (
int)IdxC) {
1427// If the mask element was already set, there's nothing to do 1428// (demanded elements analysis may unset it later). 1432"Unexpected shuffle mask element for identity shuffle");
1440/// If we have an insertelement instruction feeding into another insertelement 1441/// and the 2nd is inserting a constant into the vector, canonicalize that 1442/// constant insertion before the insertion of a variable: 1444/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 --> 1445/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1 1447/// This has the potential of eliminating the 2nd insertelement instruction 1448/// via constant folding of the scalar constant into a vector constant. 1451auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.
getOperand(0));
1452if (!InsElt1 || !InsElt1->hasOneUse())
1470/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex 1471/// --> shufflevector X, CVec', Mask' 1473auto *Inst = dyn_cast<Instruction>(InsElt.
getOperand(0));
1474// Bail out if the parent has more than one use. In that case, we'd be 1475// replacing the insertelt with a shuffle, and that's not a clear win. 1476if (!Inst || !Inst->hasOneUse())
1478if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.
getOperand(0))) {
1479// The shuffle must have a constant vector operand. The insertelt must have 1480// a constant scalar being inserted at a constant position in the vector. 1481Constant *ShufConstVec, *InsEltScalar;
1488// Adding an element to an arbitrary shuffle could be expensive, but a 1489// shuffle that selects elements from vectors without crossing lanes is 1491// If we're just adding a constant into that shuffle, it will still be 1496// From the above 'select' check, we know that the mask has the same number 1497// of elements as the vector input operands. We also know that each constant 1498// input element is used in its lane and can not be used more than once by 1499// the shuffle. Therefore, replace the constant in the shuffle's constant 1500// vector with the insertelt constant. Replace the constant in the shuffle's 1501// mask vector with the insertelt index plus the length of the vector 1502// (because the constant vector operand of a shuffle is always the 2nd 1505unsigned NumElts = Mask.size();
1508for (
unsignedI = 0;
I != NumElts; ++
I) {
1509if (
I == InsEltIndex) {
1510 NewShufElts[
I] = InsEltScalar;
1511 NewMaskElts[
I] = InsEltIndex + NumElts;
1513// Copy over the existing values. 1515 NewMaskElts[
I] = Mask[
I];
1518// Bail if we failed to find an element. 1523// Create new operands for a shuffle that includes the constant of the 1524// original insertelt. The old shuffle will be dead now. 1527 }
elseif (
auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1528// Transform sequences of insertelements ops with constant data/indexes into 1529// a single shuffle op. 1530// Can not handle scalable type, the number of elements needed to create 1531// shuffle mask is not a compile-time constant. 1532if (isa<ScalableVectorType>(InsElt.
getType()))
1535 cast<FixedVectorType>(InsElt.
getType())->getNumElements();
1546auto ValI = std::begin(Val);
1547// Generate new constant vector and mask. 1548// We have 2 values/masks from the insertelements instructions. Insert them 1549// into new value/mask vectors. 1553 Mask[
I] = NumElts +
I;
1557// Remaining values are filled with 'poison' values. 1558for (
unsignedI = 0;
I < NumElts; ++
I) {
1564// Create new operands for a shuffle that includes the constant of the 1565// original insertelt. 1572/// If both the base vector and the inserted element are extended from the same 1573/// type, do the insert element in the narrow source type followed by extend. 1574/// TODO: This can be extended to include other cast opcodes, but particularly 1575/// if we create a wider insertelement, make sure codegen is not harmed. 1578// We are creating a vector extend. If the original vector extend has another 1579// use, that would mean we end up with 2 vector extends, so avoid that. 1580// TODO: We could ease the use-clause to "if at least one op has one use" 1581// (assuming that the source types match - see next TODO comment). 1590 CastOpcode = Instruction::FPExt;
1592 CastOpcode = Instruction::SExt;
1594 CastOpcode = Instruction::ZExt;
1598// TODO: We can allow mismatched types by creating an intermediate cast. 1599if (
X->getType()->getScalarType() !=
Y->getType())
1602// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index) 1607/// If we are inserting 2 halves of a value into adjacent elements of a vector, 1608/// try to convert to a single insert with appropriate bitcasts. 1616// Pattern depends on endian because we expect lower index is inserted first. 1618// inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1 1620// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1 1621// Note: It is not safe to do this transform with an arbitrary base vector 1622// because the bitcast of that vector to fewer/larger elements could 1623// allow poison to spill into an element that was not poison before. 1624// TODO: Detect smaller fractions of the scalar. 1625// TODO: One-use checks are conservative. 1626auto *VTy = dyn_cast<FixedVectorType>(InsElt.
getType());
1627Value *Scalar0, *BaseVec;
1629if (!VTy || (VTy->getNumElements() & 1) ||
1636// The first insert must be to the index one less than this one, and 1637// the first insert must be to an even index. 1638if (Index0 + 1 != Index1 || Index0 & 1)
1641// For big endian, the high half of the value should be inserted first. 1642// For little endian, the low half of the value should be inserted first. 1655Type *SrcTy =
X->getType();
1657unsigned VecEltWidth = VTy->getScalarSizeInBits();
1658if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1661// Bitcast the base vector to a vector type with the source element type. 1665// Scale the insert index for a vector with half as many elements. 1666// bitcast (inselt (bitcast BaseVec), X, NewIndex) 1667uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1673Value *VecOp = IE.getOperand(0);
1674Value *ScalarOp = IE.getOperand(1);
1675Value *IdxOp = IE.getOperand(2);
1681// Canonicalize type of constant indices to i64 to simplify CSE 1682if (
auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1686Value *BaseVec, *OtherScalar;
1691 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1698// If the scalar is bitcast and inserted into undef, do the insert in the 1699// source type followed by bitcast. 1700// TODO: Generalize for insert into any constant, not just undef? 1706// inselt undef, (bitcast ScalarSrc), IdxOp --> 1707// bitcast (inselt undef, ScalarSrc, IdxOp) 1716// If the vector and scalar are both bitcast from the same element type, do 1717// the insert in that source type followed by bitcast. 1723 cast<VectorType>(VecSrc->
getType())->getElementType() ==
1725// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp --> 1726// bitcast (inselt VecSrc, ScalarSrc, IdxOp) 1731// If the inserted element was extracted from some other fixed-length vector 1732// and both indexes are valid constants, try to turn this into a shuffle. 1733// Can not handle scalable vector type, the number of elements needed to 1734// create shuffle mask is not a compile-time constant. 1737if (isa<FixedVectorType>(IE.getType()) &&
1741 isa<FixedVectorType>(ExtVecOp->
getType()) &&
1743 cast<FixedVectorType>(ExtVecOp->
getType())->getNumElements()) {
1744// TODO: Looking at the user(s) to determine if this insert is a 1745// fold-to-shuffle opportunity does not match the usual instcombine 1746// constraints. We should decide if the transform is worthy based only 1747// on this instruction and its operands, but that may not work currently. 1749// Here, we are trying to avoid creating shuffles before reaching 1750// the end of a chain of extract-insert pairs. This is complicated because 1751// we do not generally form arbitrary shuffle masks in instcombine 1752// (because those may codegen poorly), but collectShuffleElements() does 1755// The rules for determining what is an acceptable target-independent 1756// shuffle mask are fuzzy because they evolve based on the backend's 1757// capabilities and real-world impact. 1759if (!Insert.hasOneUse())
1761auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1767// Try to form a shuffle from a chain of extract-insert ops. 1768if (isShuffleRootCandidate(IE)) {
1777// The proposed shuffle may be trivial, in which case we shouldn't 1778// perform the combine. 1779if (LR.first != &IE && LR.second != &IE) {
1780// We now have a shuffle of LHS, RHS, Mask. 1781if (LR.second ==
nullptr)
1789if (
auto VecTy = dyn_cast<FixedVectorType>(VecOp->
getType())) {
1790unsigned VWidth = VecTy->getNumElements();
1791APInt PoisonElts(VWidth, 0);
1825/// Return true if we can evaluate the specified expression tree if the vector 1826/// elements were shuffled in a different order. 1829// We can always reorder the elements of a constant. 1830if (isa<Constant>(V))
1833// We won't reorder vector arguments. No IPO here. 1837// Two users may expect different orders of the elements. Don't try it. 1841if (
Depth == 0)
returnfalse;
1843switch (
I->getOpcode()) {
1844case Instruction::UDiv:
1845case Instruction::SDiv:
1846case Instruction::URem:
1847case Instruction::SRem:
1848// Propagating an undefined shuffle mask element to integer div/rem is not 1849// allowed because those opcodes can create immediate undefined behavior 1850// from an undefined element in an operand. 1854case Instruction::Add:
1855case Instruction::FAdd:
1856case Instruction::Sub:
1857case Instruction::FSub:
1858case Instruction::Mul:
1859case Instruction::FMul:
1860case Instruction::FDiv:
1861case Instruction::FRem:
1862case Instruction::Shl:
1863case Instruction::LShr:
1864case Instruction::AShr:
1865case Instruction::And:
1866case Instruction::Or:
1867case Instruction::Xor:
1868case Instruction::ICmp:
1869case Instruction::FCmp:
1870case Instruction::Trunc:
1871case Instruction::ZExt:
1872case Instruction::SExt:
1873case Instruction::FPToUI:
1874case Instruction::FPToSI:
1875case Instruction::UIToFP:
1876case Instruction::SIToFP:
1877case Instruction::FPTrunc:
1878case Instruction::FPExt:
1879case Instruction::GetElementPtr: {
1880// Bail out if we would create longer vector ops. We could allow creating 1881// longer vector ops, but that may result in more expensive codegen. 1882Type *ITy =
I->getType();
1884 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1886for (
Value *Operand :
I->operands()) {
1892case Instruction::InsertElement: {
1893ConstantInt *CI = dyn_cast<ConstantInt>(
I->getOperand(2));
1897// Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 1898// can't put an element into multiple indices. 1899bool SeenOnce =
false;
1901if (
I == ElementNumber) {
1913/// Rebuild a new instruction just like 'I' but with the new operands given. 1914/// In the event of type mismatch, the type of the operands is correct. 1918switch (
I->getOpcode()) {
1919case Instruction::Add:
1920case Instruction::FAdd:
1921case Instruction::Sub:
1922case Instruction::FSub:
1923case Instruction::Mul:
1924case Instruction::FMul:
1925case Instruction::UDiv:
1926case Instruction::SDiv:
1927case Instruction::FDiv:
1928case Instruction::URem:
1929case Instruction::SRem:
1930case Instruction::FRem:
1931case Instruction::Shl:
1932case Instruction::LShr:
1933case Instruction::AShr:
1934case Instruction::And:
1935case Instruction::Or:
1936case Instruction::Xor: {
1938assert(NewOps.
size() == 2 &&
"binary operator with #ops != 2");
1940 NewOps[0], NewOps[1]);
1941if (
auto *NewI = dyn_cast<Instruction>(New)) {
1942if (isa<OverflowingBinaryOperator>(BO)) {
1946if (isa<PossiblyExactOperator>(BO)) {
1947 NewI->setIsExact(BO->
isExact());
1949if (isa<FPMathOperator>(BO))
1950 NewI->copyFastMathFlags(
I);
1954case Instruction::ICmp:
1955assert(NewOps.
size() == 2 &&
"icmp with #ops != 2");
1956return Builder.
CreateICmp(cast<ICmpInst>(
I)->getPredicate(), NewOps[0],
1958case Instruction::FCmp:
1959assert(NewOps.
size() == 2 &&
"fcmp with #ops != 2");
1960return Builder.
CreateFCmp(cast<FCmpInst>(
I)->getPredicate(), NewOps[0],
1962case Instruction::Trunc:
1963case Instruction::ZExt:
1964case Instruction::SExt:
1965case Instruction::FPToUI:
1966case Instruction::FPToSI:
1967case Instruction::UIToFP:
1968case Instruction::SIToFP:
1969case Instruction::FPTrunc:
1970case Instruction::FPExt: {
1971// It's possible that the mask has a different number of elements from 1972// the original cast. We recompute the destination type to match the mask. 1974I->getType()->getScalarType(),
1975 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1976assert(NewOps.
size() == 1 &&
"cast with #ops != 1");
1980case Instruction::GetElementPtr: {
1983return Builder.
CreateGEP(cast<GEPOperator>(
I)->getSourceElementType(),
1985 cast<GEPOperator>(
I)->isInBounds());
1993// Mask.size() does not need to be equal to the number of vector elements. 1995assert(V->getType()->isVectorTy() &&
"can't reorder non-vector elements");
1996Type *EltTy = V->getType()->getScalarType();
1998if (isa<PoisonValue>(V))
2004if (isa<ConstantAggregateZero>(V))
2012switch (
I->getOpcode()) {
2013case Instruction::Add:
2014case Instruction::FAdd:
2015case Instruction::Sub:
2016case Instruction::FSub:
2017case Instruction::Mul:
2018case Instruction::FMul:
2019case Instruction::UDiv:
2020case Instruction::SDiv:
2021case Instruction::FDiv:
2022case Instruction::URem:
2023case Instruction::SRem:
2024case Instruction::FRem:
2025case Instruction::Shl:
2026case Instruction::LShr:
2027case Instruction::AShr:
2028case Instruction::And:
2029case Instruction::Or:
2030case Instruction::Xor:
2031case Instruction::ICmp:
2032case Instruction::FCmp:
2033case Instruction::Trunc:
2034case Instruction::ZExt:
2035case Instruction::SExt:
2036case Instruction::FPToUI:
2037case Instruction::FPToSI:
2038case Instruction::UIToFP:
2039case Instruction::SIToFP:
2040case Instruction::FPTrunc:
2041case Instruction::FPExt:
2042case Instruction::Select:
2043case Instruction::GetElementPtr: {
2047 cast<FixedVectorType>(
I->getType())->getNumElements());
2048for (
int i = 0, e =
I->getNumOperands(); i != e; ++i) {
2050// Recursively call evaluateInDifferentElementOrder on vector arguments 2051// as well. E.g. GetElementPtr may have scalar operands even if the 2052// return value is a vector, so we need to examine the operand type. 2053if (
I->getOperand(i)->getType()->isVectorTy())
2056 V =
I->getOperand(i);
2058 NeedsRebuild |= (V !=
I->getOperand(i));
2064case Instruction::InsertElement: {
2065int Element = cast<ConstantInt>(
I->getOperand(2))->getLimitedValue();
2067// The insertelement was inserting at Element. Figure out which element 2068// that becomes after shuffling. The answer is guaranteed to be unique 2069// by CanEvaluateShuffled. 2072for (
int e = Mask.size(); Index != e; ++Index) {
2073if (Mask[Index] == Element) {
2079// If element is not in Mask, no need to handle the operand 1 (element to 2080// be inserted). Just evaluate values in operand 0 according to Mask. 2093// Returns true if the shuffle is extracting a contiguous range of values from 2095// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 2096// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP| 2097// Shuffles to: |EE|FF|GG|HH| 2103unsigned MaskElems = Mask.size();
2104unsigned BegIdx = Mask.front();
2105unsigned EndIdx = Mask.back();
2106if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2108for (
unsignedI = 0;
I != MaskElems; ++
I)
2109if (
static_cast<unsigned>(Mask[
I]) != BegIdx +
I)
2114/// These are the ingredients in an alternate form binary operator as described 2122 Opcode(Opc), Op0(V0), Op1(V1) {}
2123operatorbool()
const{
return Opcode != 0; }
2126/// Binops may be transformed into binops with different opcodes and operands. 2127/// Reverse the usual canonicalization to enable folds with the non-canonical 2128/// form of the binop. If a transform is possible, return the elements of the 2129/// new binop. If not, return invalid elements. 2134case Instruction::Shl: {
2135// shl X, C --> mul X, (1 << C) 2139 Instruction::Shl, ConstantInt::get(Ty, 1),
C,
DL);
2140assert(ShlOne &&
"Constant folding of immediate constants failed");
2141return {Instruction::Mul, BO0, ShlOne};
2145case Instruction::Or: {
2146// or disjoin X, C --> add X, C 2147if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2148return {Instruction::Add, BO0, BO1};
2151case Instruction::Sub:
2152// sub 0, X --> mul X, -1 2162/// A select shuffle of a select shuffle with a shared operand can be reduced 2163/// to a single select shuffle. This is an obvious improvement in IR, and the 2164/// backend is expected to lower select shuffles efficiently. 2171unsigned NumElts = Mask.size();
2173// Canonicalize a select shuffle with common operand as Op1. 2174auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2175if (ShufOp && ShufOp->isSelect() &&
2176 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2181 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2182if (!ShufOp || !ShufOp->isSelect() ||
2183 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2186Value *
X = ShufOp->getOperand(0), *
Y = ShufOp->getOperand(1);
2188 ShufOp->getShuffleMask(Mask1);
2189assert(Mask1.
size() == NumElts &&
"Vector size changed with select shuffle");
2191// Canonicalize common operand (Op0) as X (first operand of first shuffle). 2197// If the mask chooses from X (operand 0), it stays the same. 2198// If the mask chooses from the earlier shuffle, the other mask value is 2199// transferred to the combined select shuffle: 2200// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M' 2202for (
unsigned i = 0; i != NumElts; ++i)
2203 NewMask[i] = Mask[i] < (
signed)NumElts ? Mask[i] : Mask1[i];
2205// A select mask with undef elements might look like an identity mask. 2208"Unexpected shuffle mask");
2216// Are we shuffling together some value and that same value after it has been 2217// modified by a binop with a constant? 2228// The identity constant for a binop leaves a variable operand unchanged. For 2229// a vector, this is a splat of something like 0, -1, or 1. 2230// If there's no identity constant for this binop, we're done. 2231auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2237Value *
X = Op0IsBinop ? Op1 : Op0;
2239// Prevent folding in the case the non-binop operand might have NaN values. 2240// If X can have NaN elements then we have that the floating point math 2241// operation in the transformed code may not preserve the exact NaN 2242// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`. 2243// This makes the transformation incorrect since the original program would 2244// have preserved the exact NaN bit-pattern. 2245// Avoid the folding if X can have NaN elements. 2250// Shuffle identity constants into the lanes that return the original value. 2251// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4} 2252// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4} 2253// The existing binop constant vector remains in the same operand position. 2258bool MightCreatePoisonOrUB =
2261if (MightCreatePoisonOrUB)
2264// shuf (bop X, C), X, M --> bop X, C' 2265// shuf X, (bop X, C), M --> bop X, C' 2269// An undef shuffle mask element may propagate as an undef constant element in 2270// the new binop. That would produce poison where the original code might not. 2271// If we already made a safe constant, then there's no danger. 2277/// If we have an insert of a scalar to a non-zero element of an undefined 2278/// vector and then shuffle that value, that's the same as inserting to the zero 2279/// element and shuffling. Splatting from the zero element is recognized as the 2280/// canonical form of splat. 2288// Match a shuffle that is a splat to a non-zero element. 2294// Insert into element 0 of a poison vector. 2298// Splat from element 0. Any mask element that is poison remains poison. 2300// shuf (inselt poison, X, 2), _, <2,2,undef> 2301// --> shuf (inselt poison, X, 0), poison, <0,0,undef> 2302unsigned NumMaskElts =
2303 cast<FixedVectorType>(Shuf.
getType())->getNumElements();
2305for (
unsigned i = 0; i != NumMaskElts; ++i)
2307 NewMask[i] = Mask[i];
2312/// Try to fold shuffles that are the equivalent of a vector select. 2317// Canonicalize to choose from operand 0 first unless operand 1 is undefined. 2318// Commuting undef to operand 0 conflicts with another canonicalization. 2319unsigned NumElts = cast<FixedVectorType>(Shuf.
getType())->getNumElements();
2322// TODO: Can we assert that both operands of a shuffle-select are not undef 2323// (otherwise, it would have been folded by instsimplify? 2340// If one operand is "0 - X", allow that to be viewed as "X * -1" 2341// (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired 2342// with a multiply, we will exit because C0/C1 will not be set. 2344Constant *C0 =
nullptr, *C1 =
nullptr;
2345bool ConstantsAreOp1;
2348 ConstantsAreOp1 =
false;
2353 ConstantsAreOp1 =
true;
2357// We need matching binops to fold the lanes together. 2361if (ConstantsAreOp1 && Opc0 != Opc1) {
2362// TODO: We drop "nsw" if shift is converted into multiply because it may 2363// not be correct when the shift amount is BitWidth - 1. We could examine 2364// each vector element to determine if it is safe to keep that flag. 2365if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2368assert(isa<Constant>(AltB0.Op1) &&
"Expecting constant with alt binop");
2369 Opc0 = AltB0.Opcode;
2370 C0 = cast<Constant>(AltB0.Op1);
2372assert(isa<Constant>(AltB1.Op1) &&
"Expecting constant with alt binop");
2373 Opc1 = AltB1.Opcode;
2374 C1 = cast<Constant>(AltB1.Op1);
2378if (Opc0 != Opc1 || !C0 || !C1)
2381// The opcodes must be the same. Use a new name to make that clear. 2384// Select the constant elements needed for the single binop. 2388// We are moving a binop after a shuffle. When a shuffle has an undefined 2389// mask element, the result is undefined, but it is not poison or undefined 2390// behavior. That is not necessarily true for div/rem/shift. 2391bool MightCreatePoisonOrUB =
2394if (MightCreatePoisonOrUB)
2400// Remove a binop and the shuffle by rearranging the constant: 2401// shuffle (op V, C0), (op V, C1), M --> op V, C' 2402// shuffle (op C0, V), (op C1, V), M --> op C', V 2405// If there are 2 different variable operands, we must create a new shuffle 2406// (select) first, so check uses to ensure that we don't end up with more 2407// instructions than we started with. 2411// If we use the original shuffle mask and op1 is *variable*, we would be 2412// putting an undef into operand 1 of div/rem/shift. This is either UB or 2413// poison. We do not have to guard against UB when *constants* are op1 2414// because safe constants guarantee that we do not overflow sdiv/srem (and 2415// there's no danger for other opcodes). 2416// TODO: To allow this case, create a new shuffle mask with no undefs. 2417if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2420// Note: In general, we do not create new shuffles in InstCombine because we 2421// do not know if a target can lower an arbitrary shuffle optimally. In this 2422// case, the shuffle uses the existing mask, so there is no additional risk. 2424// Select the variable vectors first, then perform the binop: 2425// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C' 2426// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M) 2433// Flags are intersected from the 2 source binops. But there are 2 exceptions: 2434// 1. If we changed an opcode, poison conditions might have changed. 2435// 2. If the shuffle had undef mask elements, the new binop might have undefs 2436// where the original code did not. But if we already made a safe constant, 2437// then there's no danger. 2438if (
auto *NewI = dyn_cast<Instruction>(NewBO)) {
2439 NewI->copyIRFlags(B0);
2440 NewI->andIRFlags(B1);
2442 NewI->setHasNoSignedWrap(
false);
2444 NewI->dropPoisonGeneratingFlags();
2449/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate. 2450/// Example (little endian): 2451/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8> 2454// This must be a bitcasted shuffle of 1 vector integer operand. 2461// The source type must have the same number of elements as the shuffle, 2462// and the source element type must be larger than the shuffle element type. 2463Type *SrcType =
X->getType();
2465 cast<FixedVectorType>(SrcType)->getNumElements() !=
2466 cast<FixedVectorType>(DestType)->getNumElements() ||
2471"Expected a shuffle that decreases length");
2473// Last, check that the mask chooses the correct low bits for each narrow 2474// element in the result. 2478for (
unsigned i = 0, e = Mask.size(); i != e; ++i) {
2481uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2482assert(LSBIndex <= INT32_MAX &&
"Overflowed 32-bits");
2483if (Mask[i] != (
int)LSBIndex)
2490/// Match a shuffle-select-shuffle pattern where the shuffles are widening and 2491/// narrowing (concatenating with poison and extracting back to the original 2492/// length). This allows replacing the wide select with a narrow select. 2495// This must be a narrowing identity shuffle. It extracts the 1st N elements 2496// of the 1st vector operand of a shuffle. 2500// The vector being shuffled must be a vector select that we can eliminate. 2501// TODO: The one-use requirement could be eased if X and/or Y are constants. 2507// We need a narrow condition value. It must be extended with poison elements 2508// and have the same number of elements as this shuffle. 2509unsigned NarrowNumElts =
2510 cast<FixedVectorType>(Shuf.
getType())->getNumElements();
2513 cast<FixedVectorType>(NarrowCond->
getType())->getNumElements() !=
2515 !cast<ShuffleVectorInst>(
Cond)->isIdentityWithPadding())
2518// shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask) 2520// sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask) 2526/// Canonicalize FP negate/abs after shuffle. 2529auto *S0 = dyn_cast<Instruction>(Shuf.
getOperand(0));
2534bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2536// Match 1-input (unary) shuffle. 2537// shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask) 2550// Match 2-input (binary) shuffle. 2554 S0->getOpcode() !=
S1->getOpcode() ||
2555 (!S0->hasOneUse() && !
S1->hasOneUse()))
2558// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask) 2562 NewF = UnaryOperator::CreateFNeg(NewShuf);
2573/// Canonicalize casts after shuffle. 2576// Do we have 2 matching cast operands? 2577auto *Cast0 = dyn_cast<CastInst>(Shuf.
getOperand(0));
2578auto *Cast1 = dyn_cast<CastInst>(Shuf.
getOperand(1));
2579if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2580 Cast0->getSrcTy() != Cast1->getSrcTy())
2583// TODO: Allow other opcodes? That would require easing the type restrictions 2586switch (CastOpcode) {
2587case Instruction::FPToSI:
2588case Instruction::FPToUI:
2589case Instruction::SIToFP:
2590case Instruction::UIToFP:
2598VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2600// TODO: Allow length-increasing shuffles? 2601if (ShufTy->getElementCount().getKnownMinValue() >
2602 ShufOpTy->getElementCount().getKnownMinValue())
2605// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)? 2606assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2607"Expected fixed vector operands for casts and binary shuffle");
2608if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2611// At least one of the operands must have only one use (the shuffle). 2612if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2615// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask) 2616Value *
X = Cast0->getOperand(0);
2617Value *
Y = Cast1->getOperand(0);
2622/// Try to fold an extract subvector operation. 2628// Check if we are extracting all bits of an inserted scalar: 2629// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type 2632X->getType()->getPrimitiveSizeInBits() ==
2636// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask. 2642// Be conservative with shuffle transforms. If we can't kill the 1st shuffle, 2643// then combining may result in worse codegen. 2647// We are extracting a subvector from a shuffle. Remove excess elements from 2648// the 1st shuffle mask to eliminate the extract. 2650// This transform is conservatively limited to identity extracts because we do 2651// not allow arbitrary shuffle mask creation as a target-independent transform 2652// (because we can't guarantee that will lower efficiently). 2654// If the extracting shuffle has an poison mask element, it transfers to the 2655// new shuffle mask. Otherwise, copy the original mask element. Example: 2656// shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> --> 2657// shuf X, Y, <C0, poison, C2, poison> 2658unsigned NumElts = cast<FixedVectorType>(Shuf.
getType())->getNumElements();
2660assert(NumElts < Mask.size() &&
2661"Identity with extract must have less elements than its inputs");
2663for (
unsigned i = 0; i != NumElts; ++i) {
2665int MaskElt = Mask[i];
2666 NewMask[i] = ExtractMaskElt ==
PoisonMaskElem ? ExtractMaskElt : MaskElt;
2671/// Try to replace a shuffle with an insertelement or try to replace a shuffle 2672/// operand with the operand of an insertelement. 2679int NumElts = Mask.size();
2680int InpNumElts = cast<FixedVectorType>(V0->
getType())->getNumElements();
2682// This is a specialization of a fold in SimplifyDemandedVectorElts. We may 2683// not be able to handle it there if the insertelement has >1 use. 2684// If the shuffle has an insertelement operand but does not choose the 2685// inserted scalar element from that value, then we can replace that shuffle 2686// operand with the source vector of the insertelement. 2690// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask 2695// Offset the index constant by the vector width because we are checking for 2696// accesses to the 2nd vector input of the shuffle. 2698// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask 2702// For the rest of the transform, the shuffle must not change vector sizes. 2703// TODO: This restriction could be removed if the insert has only one use 2704// (because the transform would require a new length-changing shuffle). 2705if (NumElts != InpNumElts)
2708// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' 2710// We need an insertelement with a constant index. 2715// Test the shuffle mask to see if it splices the inserted scalar into the 2716// operand 1 vector of the shuffle. 2717int NewInsIndex = -1;
2718for (
int i = 0; i != NumElts; ++i) {
2719// Ignore undef mask elements. 2723// The shuffle takes elements of operand 1 without lane changes. 2724if (Mask[i] == NumElts + i)
2727// The shuffle must choose the inserted scalar exactly once. 2728if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2731// The shuffle is placing the inserted scalar into element i. 2735assert(NewInsIndex != -1 &&
"Did not fold shuffle with unused operand?");
2737// Index is updated to the potentially translated insertion lane. 2738 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2742// If the shuffle is unnecessary, insert the scalar operand directly into 2743// operand 1 of the shuffle. Example: 2744// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0 2747if (isShufflingScalarIntoOp1(Scalar, IndexC))
2750// Try again after commuting shuffle. Example: 2751// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> --> 2752// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3 2755if (isShufflingScalarIntoOp1(Scalar, IndexC))
2762// Match the operands as identity with padding (also known as concatenation 2763// with undef) shuffles of the same source type. The backend is expected to 2764// recreate these concatenations from a shuffle of narrow operands. 2765auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.
getOperand(0));
2766auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.
getOperand(1));
2767if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2768 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2771// We limit this transform to power-of-2 types because we expect that the 2772// backend can convert the simplified IR patterns to identical nodes as the 2774// TODO: If we can verify the same behavior for arbitrary types, the 2775// power-of-2 checks can be removed. 2776Value *
X = Shuffle0->getOperand(0);
2777Value *
Y = Shuffle1->getOperand(0);
2778if (
X->getType() !=
Y->getType() ||
2781 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2782 !
isPowerOf2_32(cast<FixedVectorType>(
X->getType())->getNumElements()) ||
2787"Unexpected operand for identity shuffle");
2789// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source 2790// operands directly by adjusting the shuffle mask to account for the narrower 2792// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' 2793int NarrowElts = cast<FixedVectorType>(
X->getType())->getNumElements();
2794int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2795assert(WideElts > NarrowElts &&
"Unexpected types for identity with padding");
2799for (
int i = 0, e = Mask.size(); i != e; ++i) {
2803// If this shuffle is choosing an undef element from 1 of the sources, that 2805if (Mask[i] < WideElts) {
2806if (Shuffle0->getMaskValue(Mask[i]) == -1)
2809if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2813// If this shuffle is choosing from the 1st narrow op, the mask element is 2814// the same. If this shuffle is choosing from the 2nd narrow op, the mask 2815// element is offset down to adjust for the narrow vector widths. 2816if (Mask[i] < WideElts) {
2817assert(Mask[i] < NarrowElts &&
"Unexpected shuffle mask");
2818 NewMask[i] = Mask[i];
2820assert(Mask[i] < (WideElts + NarrowElts) &&
"Unexpected shuffle mask");
2821 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2827// Splatting the first element of the result of a BinOp, where any of the 2828// BinOp's operands are the result of a first element splat can be simplified to 2829// splatting the first element of the result of the BinOp 2843if (
X->getType() !=
Y->getType())
2846auto *BinOp = cast<BinaryOperator>(Op0);
2851if (
auto NewBOI = dyn_cast<Instruction>(NewBO))
2852 NewBOI->copyIRFlags(BinOp);
2868// Canonicalize splat shuffle to use poison RHS. Handle this explicitly in 2869// order to support scalable vectors. 2876unsigned VWidth = cast<FixedVectorType>(SVI.
getType())->getNumElements();
2877unsigned LHSWidth = cast<FixedVectorType>(
LHS->
getType())->getNumElements();
2879// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask) 2881// if X and Y are of the same (vector) type, and the element size is not 2882// changed by the bitcasts, we can distribute the bitcasts through the 2883// shuffle, hopefully reducing the number of instructions. We make sure that 2884// at least one bitcast only has one use, so we don't *increase* the number of 2885// instructions here. 2888X->getType()->isVectorTy() &&
X->getType() ==
Y->getType() &&
2889X->getType()->getScalarSizeInBits() ==
2899// Peek through a bitcasted shuffle operand by scaling the mask. If the 2900// simulated shuffle can simplify, then this shuffle is unnecessary: 2901// shuf (bitcast X), undef, Mask --> bitcast X' 2902// TODO: This could be extended to allow length-changing shuffles. 2903// The transform might also be obsoleted if we allowed canonicalization 2904// of bitcasted shuffles. 2906X->getType()->isVectorTy() && VWidth == LHSWidth) {
2907// Try to create a scaled mask constant. 2908auto *XType = cast<FixedVectorType>(
X->getType());
2909unsigned XNumElts = XType->getNumElements();
2912// If the shuffled source vector simplifies, cast that value to this 2915 ScaledMask, XType, ShufQuery))
2920// shuffle x, x, mask --> shuffle x, undef, mask' 2923"Shuffle with 2 undef ops not simplified?");
2927// shuffle undef, x, mask --> shuffle x, undef, mask' 2951APInt PoisonElts(VWidth, 0);
2962// These transforms have the potential to lose undef knowledge, so they are 2963// intentionally placed after SimplifyDemandedVectorElts(). 2970if (
auto *SI = dyn_cast<SelectInst>(
LHS)) {
2971// We cannot do this fold for elementwise select since ShuffleVector is 2973if (SI->getCondition()->getType()->isIntegerTy() &&
2974 (isa<PoisonValue>(
RHS) ||
2980if (
auto *PN = dyn_cast<PHINode>(
LHS)) {
2991// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to 2992// a non-vector type. We can instead bitcast the original vector followed by 2993// an extract of the desired element: 2995// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, 2996// <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2997// %1 = bitcast <4 x i8> %sroa to i32 2999// %bc = bitcast <16 x i8> %in to <4 x i32> 3000// %ext = extractelement <4 x i32> %bc, i32 0 3002// If the shuffle is extracting a contiguous range of values from the input 3003// vector then each use which is a bitcast of the extracted size can be 3004// replaced. This will work if the vector types are compatible, and the begin 3005// index is aligned to a value in the casted vector type. If the begin index 3006// isn't aligned then we can shuffle the original vector (keeping the same 3007// vector type) before extracting. 3009// This code will bail out if the target type is fundamentally incompatible 3010// with vectors of the source type. 3012// Example of <16 x i8>, target type i32: 3013// Index range [4,8): v-----------v Will work. 3014// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 3015// <16 x i8>: | | | | | | | | | | | | | | | | | 3016// <4 x i32>: | | | | | 3017// +-----------+-----------+-----------+-----------+ 3018// Index range [6,10): ^-----------^ Needs an extra shuffle. 3019// Target type i40: ^--------------^ Won't work, bail. 3020bool MadeChange =
false;
3023unsigned MaskElems = Mask.size();
3024auto *SrcTy = cast<FixedVectorType>(V->getType());
3025unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
3027assert(SrcElemBitWidth &&
"vector elements must have a bitwidth");
3028unsigned SrcNumElems = SrcTy->getNumElements();
3033if (!BC->use_empty())
3034// Only visit bitcasts that weren't previously handled. 3037unsigned BegIdx = Mask.front();
3038Type *TgtTy = BC->getDestTy();
3040if (!TgtElemBitWidth)
3042unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3043bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3044bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3045if (!VecBitWidthsEqual)
3051// Shuffle the input so [0,NumElements) contains the output, and 3052// [NumElems,SrcNumElems) is undef. 3054for (
unsignedI = 0, E = MaskElems,
Idx = BegIdx;
I != E; ++
Idx, ++
I)
3055 ShuffleMask[
I] =
Idx;
3060unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3061assert(SrcElemsPerTgtElem);
3062 BegIdx /= SrcElemsPerTgtElem;
3063auto [It, Inserted] = NewBCs.
try_emplace(CastSrcTy);
3068// The shufflevector isn't being replaced: the bitcast that used it 3069// is. InstCombine will visit the newly-created instructions. 3075// If the LHS is a shufflevector itself, see if we can combine it with this 3076// one without producing an unusual shuffle. 3077// Cases that might be simplified: 3079// x1=shuffle(v1,v2,mask1) 3080// x=shuffle(x1,undef,mask) 3082// x=shuffle(v1,undef,newMask) 3083// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 3085// x1=shuffle(v1,undef,mask1) 3086// x=shuffle(x1,x2,mask) 3087// where v1.size() == mask1.size() 3089// x=shuffle(v1,x2,newMask) 3090// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 3092// x2=shuffle(v2,undef,mask2) 3093// x=shuffle(x1,x2,mask) 3094// where v2.size() == mask2.size() 3096// x=shuffle(x1,v2,newMask) 3097// newMask[i] = (mask[i] < x1.size()) 3098// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 3100// x1=shuffle(v1,undef,mask1) 3101// x2=shuffle(v2,undef,mask2) 3102// x=shuffle(x1,x2,mask) 3103// where v1.size() == v2.size() 3105// x=shuffle(v1,v2,newMask) 3106// newMask[i] = (mask[i] < x1.size()) 3107// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 3109// Here we are really conservative: 3110// we are absolutely afraid of producing a shuffle mask not in the input 3111// program, because the code gen may not be smart enough to turn a merged 3112// shuffle into two specific shuffles: it may produce worse code. As such, 3113// we only merge two shuffles if the result is either a splat or one of the 3114// input shuffle masks. In this case, merging the shuffles just removes 3115// one instruction, which we know is safe. This is good for things like 3116// turning: (splat(splat)) -> splat, or 3117// merge(V[0..n], V[n+1..2n]) -> V[0..2n] 3123 LHSShuffle =
nullptr;
3126 RHSShuffle =
nullptr;
3127if (!LHSShuffle && !RHSShuffle)
3128return MadeChange ? &SVI :
nullptr;
3130Value* LHSOp0 =
nullptr;
3131Value* LHSOp1 =
nullptr;
3132Value* RHSOp0 =
nullptr;
3133unsigned LHSOp0Width = 0;
3134unsigned RHSOp0Width = 0;
3138 LHSOp0Width = cast<FixedVectorType>(LHSOp0->
getType())->getNumElements();
3142 RHSOp0Width = cast<FixedVectorType>(RHSOp0->
getType())->getNumElements();
3153elseif (LHSOp0Width == LHSWidth) {
3158if (RHSShuffle && RHSOp0Width == LHSWidth) {
3162if (LHSOp0 == RHSOp0) {
3167if (newLHS ==
LHS && newRHS ==
RHS)
3168return MadeChange ? &SVI :
nullptr;
3174if (RHSShuffle && newRHS !=
RHS)
3177unsigned newLHSWidth = (newLHS !=
LHS) ? LHSOp0Width : LHSWidth;
3181// Create a new mask for the new ShuffleVectorInst so that the new 3182// ShuffleVectorInst is equivalent to the original one. 3183for (
unsigned i = 0; i < VWidth; ++i) {
3186// This element is a poison value. 3188 }
elseif (Mask[i] < (
int)LHSWidth) {
3189// This element is from left hand side vector operand. 3191// If LHS is going to be replaced (case 1, 2, or 4), calculate the 3192// new mask value for the element. 3194 eltMask = LHSMask[Mask[i]];
3195// If the value selected is an poison value, explicitly specify it 3196// with a -1 mask value. 3197if (eltMask >= (
int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3202// This element is from right hand side vector operand 3204// If the value selected is a poison value, explicitly specify it 3205// with a -1 mask value. (case 1) 3208// If RHS is going to be replaced (case 3 or 4), calculate the 3209// new mask value for the element. 3210elseif (newRHS !=
RHS) {
3211 eltMask = RHSMask[Mask[i]-LHSWidth];
3212// If the value selected is an poison value, explicitly specify it 3213// with a -1 mask value. 3214if (eltMask >= (
int)RHSOp0Width) {
3216"should have been check above");
3220 eltMask = Mask[i]-LHSWidth;
3222// If LHS's width is changed, shift the mask value accordingly. 3223// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any 3224// references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 3225// If newRHS == newLHS, we want to remap any references from newRHS to 3226// newLHS so that we can properly identify splats that may occur due to 3227// obfuscation across the two vectors. 3228if (eltMask >= 0 && newRHS !=
nullptr && newLHS != newRHS)
3229 eltMask += newLHSWidth;
3232// Check if this could still be a splat. 3234if (SplatElt >= 0 && SplatElt != eltMask)
3242// If the result mask is equal to one of the original shuffle masks, 3243// or is a splat, do the replacement. 3244if (
isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3250return MadeChange ? &SVI :
nullptr;
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
static const uint32_t IV[8]
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
static CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantAggregateZero * get(Type *Ty)
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Value * getVectorOperand()
Value * getIndexOperand()
VectorType * getVectorOperandType() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Common base class shared among various IRBuilders.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
const SimplifyQuery & getSimplifyQuery() const
void addValue(Value *V)
Add value to the worklist if it is an instruction.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
A wrapper class for inspecting calls to intrinsic functions.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
UnaryOps getOpcode() const
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I)
Don't use information from its non-constant operands.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(const Instruction *I) const