Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallBitVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/VectorUtils.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Operator.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Transforms/InstCombine/InstCombiner.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespacellvm;
47using namespacePatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50"Number of aggregate reconstructions turned into reuse of the "
51"original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58staticboolcheapToScalarize(Value *V,Value *EI) {
59ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61// If we can pick a scalar constant value out of a vector, that is free.
62if (auto *C = dyn_cast<Constant>(V))
63return CEI ||C->getSplatValue();
64
65if (CEI &&match(V, m_Intrinsic<Intrinsic::stepvector>())) {
66ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67// Index needs to be lower than the minimum size of the vector, because
68// for scalable vector, the vector size is known at run time.
69return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72// An insertelement to the same constant index as our extract will simplify
73// to the scalar inserted element. An insertelement to a different constant
74// index is irrelevant to our extract.
75if (match(V,m_InsertElt(m_Value(),m_Value(),m_ConstantInt())))
76return CEI;
77
78if (match(V,m_OneUse(m_Load(m_Value()))))
79returntrue;
80
81if (match(V,m_OneUse(m_UnOp())))
82returntrue;
83
84Value *V0, *V1;
85if (match(V,m_OneUse(m_BinOp(m_Value(V0),m_Value(V1)))))
86if (cheapToScalarize(V0, EI) ||cheapToScalarize(V1, EI))
87returntrue;
88
89CmpPredicate UnusedPred;
90if (match(V,m_OneUse(m_Cmp(UnusedPred,m_Value(V0),m_Value(V1)))))
91if (cheapToScalarize(V0, EI) ||cheapToScalarize(V1, EI))
92returntrue;
93
94returnfalse;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101PHINode *PN) {
102SmallVector<Instruction *, 2> Extracts;
103// The users we want the PHI to have are:
104// 1) The EI ExtractElement (we already know this)
105// 2) Possibly more ExtractElements with the same index.
106// 3) Another operand, which will feed back into the PHI.
107Instruction *PHIUser =nullptr;
108for (auto *U : PN->users()) {
109if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112else
113returnnullptr;
114 }elseif (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 }else {
117returnnullptr;
118 }
119 }
120
121if (!PHIUser)
122returnnullptr;
123
124// Verify that this PHI user has one use, which is the PHI itself,
125// and that it is a binary operation which is cheap to scalarize.
126// otherwise return nullptr.
127if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130returnnullptr;
131
132// Create a scalar PHI node that will replace the vector PHI node
133// just before the current PHI node.
134PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
135PHINode::Create(EI.getType(), PN->getNumIncomingValues(),""), PN->getIterator()));
136// Scalarize each PHI operand.
137for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138Value *PHIInVal = PN->getIncomingValue(i);
139BasicBlock *inBB = PN->getIncomingBlock(i);
140Value *Elt = EI.getIndexOperand();
141// If the operand is the PHI induction variable:
142if (PHIInVal == PHIUser) {
143// Scalarize the binary operation. Its first operand is the
144// scalar PHI, and the second operand is extracted from the other
145// vector operand.
146BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
148Value *Op =InsertNewInstWith(
149ExtractElementInst::Create(B0->getOperand(opId), Elt,
150 B0->getOperand(opId)->getName() +".Elt"),
151 B0->getIterator());
152Value *newPHIUser =InsertNewInstWith(
153BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
154 scalarPHI,Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 }else {
157// Scalarize PHI input:
158Instruction *newEI =ExtractElementInst::Create(PHIInVal, Elt,"");
159// Insert the new instruction into the predecessor basic block.
160Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161BasicBlock::iterator InsertPos;
162if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 }else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174for (auto *E : Extracts) {
175replaceInstUsesWith(*E, scalarPHI);
176// Add old extract to worklist for DCE.
177addToWorklist(E);
178 }
179
180return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184Value *X;
185uint64_t ExtIndexC;
186if (!match(Ext.getVectorOperand(),m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(),m_ConstantInt(ExtIndexC)))
188returnnullptr;
189
190ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192Type *DestTy =Ext.getType();
193unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194bool IsBigEndian =DL.isBigEndian();
195
196// If we are casting an integer to vector and extracting a portion, that is
197// a shift-right and truncate.
198if (X->getType()->isIntegerTy()) {
199assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200"Expected fixed vector type for bitcast from scalar integer");
201
202// Big endian requires adjusting the extract index since MSB is at index 0.
203// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207unsigned ShiftAmountC = ExtIndexC * DestWidth;
208if ((!ShiftAmountC ||
209 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
210Ext.getVectorOperand()->hasOneUse()) {
211if (ShiftAmountC)
212X =Builder.CreateLShr(X, ShiftAmountC,"extelt.offset");
213if (DestTy->isFloatingPointTy()) {
214Type *DstIntTy =IntegerType::getIntNTy(X->getContext(), DestWidth);
215Value *Trunc =Builder.CreateTrunc(X, DstIntTy);
216returnnewBitCastInst(Trunc, DestTy);
217 }
218returnnewTruncInst(X, DestTy);
219 }
220 }
221
222if (!X->getType()->isVectorTy())
223returnnullptr;
224
225// If this extractelement is using a bitcast from a vector of the same number
226// of elements, see if we can find the source element from the source vector:
227// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228auto *SrcTy = cast<VectorType>(X->getType());
229ElementCount NumSrcElts = SrcTy->getElementCount();
230if (NumSrcElts == NumElts)
231if (Value *Elt =findScalarElement(X, ExtIndexC))
232returnnewBitCastInst(Elt, DestTy);
233
234assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235"Src and Dst must be the same sort of vector type");
236
237// If the source elements are wider than the destination, try to shift and
238// truncate a subset of scalar bits of an insert op.
239if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240Value *Scalar;
241Value *Vec;
242uint64_t InsIndexC;
243if (!match(X,m_InsertElt(m_Value(Vec),m_Value(Scalar),
244m_ConstantInt(InsIndexC))))
245returnnullptr;
246
247// The extract must be from the subset of vector elements that we inserted
248// into. Example: if we inserted element 1 of a <2 x i64> and we are
249// extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250// of elements 4-7 of the bitcasted vector.
251unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254if (ExtIndexC / NarrowingRatio != InsIndexC) {
255// Remove insertelement, if we don't use the inserted element.
256// extractelement (bitcast (insertelement (Vec, b)), a) ->
257// extractelement (bitcast (Vec), a)
258// FIXME: this should be removed to SimplifyDemandedVectorElts,
259// once scale vectors are supported.
260if (X->hasOneUse() &&Ext.getVectorOperand()->hasOneUse()) {
261Value *NewBC =Builder.CreateBitCast(Vec,Ext.getVectorOperandType());
262returnExtractElementInst::Create(NewBC,Ext.getIndexOperand());
263 }
264returnnullptr;
265 }
266
267// We are extracting part of the original scalar. How that scalar is
268// inserted into the vector depends on the endian-ness. Example:
269// Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270// +--+--+--+--+--+--+--+--+
271// inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272// extelt <4 x i16> V', 3: | |S2|S3|
273// +--+--+--+--+--+--+--+--+
274// If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275// If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276// In this example, we must right-shift little-endian. Big-endian is just a
277// truncate.
278unsigned Chunk = ExtIndexC % NarrowingRatio;
279if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282// Bail out if this is an FP vector to FP vector sequence. That would take
283// more instructions than we started with unless there is no shift, and it
284// may not be handled as well in the backend.
285bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286bool NeedDestBitcast = DestTy->isFloatingPointTy();
287if (NeedSrcBitcast && NeedDestBitcast)
288returnnullptr;
289
290unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291unsigned ShAmt = Chunk * DestWidth;
292
293// TODO: This limitation is more strict than necessary. We could sum the
294// number of new instructions and subtract the number eliminated to know if
295// we can proceed.
296if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297if (NeedSrcBitcast || NeedDestBitcast)
298returnnullptr;
299
300if (NeedSrcBitcast) {
301Type *SrcIntTy =IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302Scalar =Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305if (ShAmt) {
306// Bail out if we could end with more instructions than we started with.
307if (!Ext.getVectorOperand()->hasOneUse())
308returnnullptr;
309Scalar =Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312if (NeedDestBitcast) {
313Type *DestIntTy =IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314returnnewBitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316returnnewTruncInst(Scalar, DestTy);
317 }
318
319returnnullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
323staticAPIntfindDemandedEltsBySingleUser(Value *V,Instruction *UserInstr) {
324unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326// Conservatively assume that all elements are needed.
327APInt UsedElts(APInt::getAllOnes(VWidth));
328
329switch (UserInstr->getOpcode()) {
330case Instruction::ExtractElement: {
331ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332assert(EEI->getVectorOperand() == V);
333ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts =APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337break;
338 }
339case Instruction::ShuffleVector: {
340ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts =APInt(VWidth, 0);
345for (unsigned i = 0; i < MaskNumElts; i++) {
346unsigned MaskVal = Shuffle->getMaskValue(i);
347if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348continue;
349if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355break;
356 }
357default:
358break;
359 }
360return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
367staticAPIntfindDemandedEltsByAllUsers(Value *V) {
368unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370APInt UnionUsedElts(VWidth, 0);
371for (constUse &U : V->uses()) {
372if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |=findDemandedEltsBySingleUser(V,I);
374 }else {
375 UnionUsedElts =APInt::getAllOnes(VWidth);
376break;
377 }
378
379if (UnionUsedElts.isAllOnes())
380break;
381 }
382
383return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
390staticConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
391constunsigned IndexBW = IndexC->getBitWidth();
392if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393returnnullptr;
394return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
398Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
399Value *SrcVec = EI.getVectorOperand();
400Value *Index = EI.getIndexOperand();
401if (Value *V =simplifyExtractElementInst(SrcVec, Index,
402SQ.getWithInstruction(&EI)))
403returnreplaceInstUsesWith(EI, V);
404
405// extractelt (select %x, %vec1, %vec2), %const ->
406// select %x, %vec1[%const], %vec2[%const]
407// TODO: Support constant folding of multiple select operands:
408// extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409// If the extractelement will for instance try to do out of bounds accesses
410// because of the values of %c1 and/or %c2, the sequence could be optimized
411// early. This is currently not possible because constant folding will reach
412// an unreachable assertion if it doesn't find a constant operand.
413if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416if (Instruction *R =FoldOpIntoSelect(EI, SI))
417return R;
418
419// If extracting a specified index from the vector, see if we can recursively
420// find a previously computed scalar that was inserted into the vector.
421auto *IndexC = dyn_cast<ConstantInt>(Index);
422bool HasKnownValidIndex =false;
423if (IndexC) {
424// Canonicalize type of constant indices to i64 to simplify CSE
425if (auto *NewIdx =getPreferredVectorIndex(IndexC))
426returnreplaceOperand(EI, 1, NewIdx);
427
428ElementCount EC = EI.getVectorOperandType()->getElementCount();
429unsigned NumElts = EC.getKnownMinValue();
430 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
431
432if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
433Intrinsic::ID IID =II->getIntrinsicID();
434// Index needs to be lower than the minimum size of the vector, because
435// for scalable vector, the vector size is known at run time.
436if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
437Type *Ty = EI.getType();
438unsignedBitWidth = Ty->getIntegerBitWidth();
439Value *Idx;
440// Return index when its value does not exceed the allowed limit
441// for the element type of the vector, otherwise return undefined.
442if (IndexC->getValue().getActiveBits() <=BitWidth)
443Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
444else
445Idx =PoisonValue::get(Ty);
446returnreplaceInstUsesWith(EI,Idx);
447 }
448 }
449
450// InstSimplify should handle cases where the index is invalid.
451// For fixed-length vector, it's invalid to extract out-of-range element.
452if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
453returnnullptr;
454
455if (Instruction *I = foldBitcastExtElt(EI))
456returnI;
457
458// If there's a vector PHI feeding a scalar use through this extractelement
459// instruction, try to scalarize the PHI.
460if (auto *Phi = dyn_cast<PHINode>(SrcVec))
461if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
462return ScalarPHI;
463 }
464
465// TODO come up with a n-ary matcher that subsumes both unary and
466// binary matchers.
467UnaryOperator *UO;
468if (match(SrcVec,m_UnOp(UO)) &&cheapToScalarize(SrcVec, Index)) {
469// extelt (unop X), Index --> unop (extelt X, Index)
470Value *X = UO->getOperand(0);
471Value *E =Builder.CreateExtractElement(X, Index);
472returnUnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
473 }
474
475// If the binop is not speculatable, we cannot hoist the extractelement if
476// it may make the operand poison.
477BinaryOperator *BO;
478if (match(SrcVec,m_BinOp(BO)) &&cheapToScalarize(SrcVec, Index) &&
479 (HasKnownValidIndex ||
480isSafeToSpeculativelyExecuteWithVariableReplaced(BO))) {
481// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
482Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
483Value *E0 =Builder.CreateExtractElement(X, Index);
484Value *E1 =Builder.CreateExtractElement(Y, Index);
485returnBinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
486 }
487
488Value *X, *Y;
489CmpPredicate Pred;
490if (match(SrcVec,m_Cmp(Pred,m_Value(X),m_Value(Y))) &&
491cheapToScalarize(SrcVec, Index)) {
492// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
493Value *E0 =Builder.CreateExtractElement(X, Index);
494Value *E1 =Builder.CreateExtractElement(Y, Index);
495CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
496returnCmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
497 SrcCmpInst);
498 }
499
500if (auto *I = dyn_cast<Instruction>(SrcVec)) {
501if (auto *IE = dyn_cast<InsertElementInst>(I)) {
502// instsimplify already handled the case where the indices are constants
503// and equal by value, if both are constants, they must not be the same
504// value, extract from the pre-inserted value instead.
505if (isa<Constant>(IE->getOperand(2)) && IndexC)
506returnreplaceOperand(EI, 0, IE->getOperand(0));
507 }elseif (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
508auto *VecType = cast<VectorType>(GEP->getType());
509ElementCount EC = VecType->getElementCount();
510uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
511if (IndexC && IdxVal < EC.getKnownMinValue() &&GEP->hasOneUse()) {
512// Find out why we have a vector result - these are a few examples:
513// 1. We have a scalar pointer and a vector of indices, or
514// 2. We have a vector of pointers and a scalar index, or
515// 3. We have a vector of pointers and a vector of indices, etc.
516// Here we only consider combining when there is exactly one vector
517// operand, since the optimization is less obviously a win due to
518// needing more than one extractelements.
519
520unsigned VectorOps =
521llvm::count_if(GEP->operands(), [](constValue *V) {
522 return isa<VectorType>(V->getType());
523 });
524if (VectorOps == 1) {
525Value *NewPtr =GEP->getPointerOperand();
526if (isa<VectorType>(NewPtr->getType()))
527 NewPtr =Builder.CreateExtractElement(NewPtr, IndexC);
528
529SmallVector<Value *> NewOps;
530for (unsignedI = 1;I !=GEP->getNumOperands(); ++I) {
531Value *Op =GEP->getOperand(I);
532if (isa<VectorType>(Op->getType()))
533 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
534else
535 NewOps.push_back(Op);
536 }
537
538GetElementPtrInst *NewGEP =GetElementPtrInst::Create(
539GEP->getSourceElementType(), NewPtr, NewOps);
540 NewGEP->setIsInBounds(GEP->isInBounds());
541return NewGEP;
542 }
543 }
544 }elseif (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
545// If this is extracting an element from a shufflevector, figure out where
546// it came from and extract from the appropriate input element instead.
547// Restrict the following transformation to fixed-length vector.
548if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
549int SrcIdx =
550 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
551Value *Src;
552unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
553 ->getNumElements();
554
555if (SrcIdx < 0)
556returnreplaceInstUsesWith(EI,PoisonValue::get(EI.getType()));
557if (SrcIdx < (int)LHSWidth)
558 Src = SVI->getOperand(0);
559else {
560 SrcIdx -= LHSWidth;
561 Src = SVI->getOperand(1);
562 }
563Type *Int64Ty =Type::getInt64Ty(EI.getContext());
564returnExtractElementInst::Create(
565 Src, ConstantInt::get(Int64Ty, SrcIdx,false));
566 }
567 }elseif (auto *CI = dyn_cast<CastInst>(I)) {
568// Canonicalize extractelement(cast) -> cast(extractelement).
569// Bitcasts can change the number of vector elements, and they cost
570// nothing.
571if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
572Value *EE =Builder.CreateExtractElement(CI->getOperand(0), Index);
573returnCastInst::Create(CI->getOpcode(), EE, EI.getType());
574 }
575 }
576 }
577
578// Run demanded elements after other transforms as this can drop flags on
579// binops. If there's two paths to the same final result, we prefer the
580// one which doesn't force us to drop flags.
581if (IndexC) {
582ElementCount EC = EI.getVectorOperandType()->getElementCount();
583unsigned NumElts = EC.getKnownMinValue();
584// This instruction only demands the single element from the input vector.
585// Skip for scalable type, the number of elements is unknown at
586// compile-time.
587if (!EC.isScalable() && NumElts != 1) {
588// If the input vector has a single use, simplify it based on this use
589// property.
590if (SrcVec->hasOneUse()) {
591APInt PoisonElts(NumElts, 0);
592APInt DemandedElts(NumElts, 0);
593 DemandedElts.setBit(IndexC->getZExtValue());
594if (Value *V =
595SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
596returnreplaceOperand(EI, 0, V);
597 }else {
598// If the input vector has multiple uses, simplify it based on a union
599// of all elements used.
600APInt DemandedElts =findDemandedEltsByAllUsers(SrcVec);
601if (!DemandedElts.isAllOnes()) {
602APInt PoisonElts(NumElts, 0);
603if (Value *V =SimplifyDemandedVectorElts(
604 SrcVec, DemandedElts, PoisonElts, 0/* Depth */,
605true/* AllowMultipleUsers */)) {
606if (V != SrcVec) {
607Worklist.addValue(SrcVec);
608 SrcVec->replaceAllUsesWith(V);
609return &EI;
610 }
611 }
612 }
613 }
614 }
615 }
616returnnullptr;
617}
618
619/// If V is a shuffle of values that ONLY returns elements from either LHS or
620/// RHS, return the shuffle mask and true. Otherwise, return false.
621staticboolcollectSingleShuffleElements(Value *V,Value *LHS,Value *RHS,
622SmallVectorImpl<int> &Mask) {
623assert(LHS->getType() ==RHS->getType() &&
624"Invalid CollectSingleShuffleElements");
625unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
626
627if (match(V,m_Poison())) {
628 Mask.assign(NumElts, -1);
629returntrue;
630 }
631
632if (V ==LHS) {
633for (unsigned i = 0; i != NumElts; ++i)
634 Mask.push_back(i);
635returntrue;
636 }
637
638if (V ==RHS) {
639for (unsigned i = 0; i != NumElts; ++i)
640 Mask.push_back(i + NumElts);
641returntrue;
642 }
643
644if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
645// If this is an insert of an extract from some other vector, include it.
646Value *VecOp = IEI->getOperand(0);
647Value *ScalarOp = IEI->getOperand(1);
648Value *IdxOp = IEI->getOperand(2);
649
650if (!isa<ConstantInt>(IdxOp))
651returnfalse;
652unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
653
654if (isa<PoisonValue>(ScalarOp)) {// inserting poison into vector.
655// We can handle this if the vector we are inserting into is
656// transitively ok.
657if (collectSingleShuffleElements(VecOp,LHS,RHS, Mask)) {
658// If so, update the mask to reflect the inserted poison.
659 Mask[InsertedIdx] = -1;
660returntrue;
661 }
662 }elseif (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
663if (isa<ConstantInt>(EI->getOperand(1))) {
664unsigned ExtractedIdx =
665 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
666unsigned NumLHSElts =
667 cast<FixedVectorType>(LHS->getType())->getNumElements();
668
669// This must be extracting from either LHS or RHS.
670if (EI->getOperand(0) ==LHS || EI->getOperand(0) ==RHS) {
671// We can handle this if the vector we are inserting into is
672// transitively ok.
673if (collectSingleShuffleElements(VecOp,LHS,RHS, Mask)) {
674// If so, update the mask to reflect the inserted value.
675if (EI->getOperand(0) ==LHS) {
676 Mask[InsertedIdx % NumElts] = ExtractedIdx;
677 }else {
678assert(EI->getOperand(0) ==RHS);
679 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
680 }
681returntrue;
682 }
683 }
684 }
685 }
686 }
687
688returnfalse;
689}
690
691/// If we have insertion into a vector that is wider than the vector that we
692/// are extracting from, try to widen the source vector to allow a single
693/// shufflevector to replace one or more insert/extract pairs.
694staticboolreplaceExtractElements(InsertElementInst *InsElt,
695ExtractElementInst *ExtElt,
696InstCombinerImpl &IC) {
697auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
698auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
699unsigned NumInsElts = InsVecType->getNumElements();
700unsigned NumExtElts = ExtVecType->getNumElements();
701
702// The inserted-to vector must be wider than the extracted-from vector.
703if (InsVecType->getElementType() != ExtVecType->getElementType() ||
704 NumExtElts >= NumInsElts)
705returnfalse;
706
707// Create a shuffle mask to widen the extended-from vector using poison
708// values. The mask selects all of the values of the original vector followed
709// by as many poison values as needed to create a vector of the same length
710// as the inserted-to vector.
711SmallVector<int, 16> ExtendMask;
712for (unsigned i = 0; i < NumExtElts; ++i)
713 ExtendMask.push_back(i);
714for (unsigned i = NumExtElts; i < NumInsElts; ++i)
715 ExtendMask.push_back(-1);
716
717Value *ExtVecOp = ExtElt->getVectorOperand();
718auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
719BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
720 ? ExtVecOpInst->getParent()
721 : ExtElt->getParent();
722
723// TODO: This restriction matches the basic block check below when creating
724// new extractelement instructions. If that limitation is removed, this one
725// could also be removed. But for now, we just bail out to ensure that we
726// will replace the extractelement instruction that is feeding our
727// insertelement instruction. This allows the insertelement to then be
728// replaced by a shufflevector. If the insertelement is not replaced, we can
729// induce infinite looping because there's an optimization for extractelement
730// that will delete our widening shuffle. This would trigger another attempt
731// here to create that shuffle, and we spin forever.
732if (InsertionBlock != InsElt->getParent())
733returnfalse;
734
735// TODO: This restriction matches the check in visitInsertElementInst() and
736// prevents an infinite loop caused by not turning the extract/insert pair
737// into a shuffle. We really should not need either check, but we're lacking
738// folds for shufflevectors because we're afraid to generate shuffle masks
739// that the backend can't handle.
740if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
741returnfalse;
742
743auto *WideVec =newShuffleVectorInst(ExtVecOp, ExtendMask);
744
745// Insert the new shuffle after the vector operand of the extract is defined
746// (as long as it's not a PHI) or at the start of the basic block of the
747// extract, so any subsequent extracts in the same basic block can use it.
748// TODO: Insert before the earliest ExtractElementInst that is replaced.
749if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
750 WideVec->insertAfter(ExtVecOpInst->getIterator());
751else
752 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
753
754// Replace extracts from the original narrow vector with extracts from the new
755// wide vector.
756for (User *U : ExtVecOp->users()) {
757ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
758if (!OldExt || OldExt->getParent() != WideVec->getParent())
759continue;
760auto *NewExt =ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
761 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
762 IC.replaceInstUsesWith(*OldExt, NewExt);
763// Add the old extracts to the worklist for DCE. We can't remove the
764// extracts directly, because they may still be used by the calling code.
765 IC.addToWorklist(OldExt);
766 }
767
768returntrue;
769}
770
771/// We are building a shuffle to create V, which is a sequence of insertelement,
772/// extractelement pairs. If PermittedRHS is set, then we must either use it or
773/// not rely on the second vector source. Return a std::pair containing the
774/// left and right vectors of the proposed shuffle (or 0), and set the Mask
775/// parameter as required.
776///
777/// Note: we intentionally don't try to fold earlier shuffles since they have
778/// often been chosen carefully to be efficiently implementable on the target.
779usingShuffleOps = std::pair<Value *, Value *>;
780
781staticShuffleOpscollectShuffleElements(Value *V,SmallVectorImpl<int> &Mask,
782Value *PermittedRHS,
783InstCombinerImpl &IC,bool &Rerun) {
784assert(V->getType()->isVectorTy() &&"Invalid shuffle!");
785unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
786
787if (match(V,m_Poison())) {
788 Mask.assign(NumElts, -1);
789return std::make_pair(
790 PermittedRHS ?PoisonValue::get(PermittedRHS->getType()) : V,nullptr);
791 }
792
793if (isa<ConstantAggregateZero>(V)) {
794 Mask.assign(NumElts, 0);
795return std::make_pair(V,nullptr);
796 }
797
798if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
799// If this is an insert of an extract from some other vector, include it.
800Value *VecOp = IEI->getOperand(0);
801Value *ScalarOp = IEI->getOperand(1);
802Value *IdxOp = IEI->getOperand(2);
803
804if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
805if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
806unsigned ExtractedIdx =
807 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
808unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
809
810// Either the extracted from or inserted into vector must be RHSVec,
811// otherwise we'd end up with a shuffle of three inputs.
812if (EI->getOperand(0) == PermittedRHS || PermittedRHS ==nullptr) {
813Value *RHS = EI->getOperand(0);
814ShuffleOps LR =collectShuffleElements(VecOp, Mask,RHS, IC, Rerun);
815assert(LR.second ==nullptr || LR.second ==RHS);
816
817if (LR.first->getType() !=RHS->getType()) {
818// Although we are giving up for now, see if we can create extracts
819// that match the inserts for another round of combining.
820if (replaceExtractElements(IEI, EI, IC))
821 Rerun =true;
822
823// We tried our best, but we can't find anything compatible with RHS
824// further up the chain. Return a trivial shuffle.
825for (unsigned i = 0; i < NumElts; ++i)
826 Mask[i] = i;
827return std::make_pair(V,nullptr);
828 }
829
830unsigned NumLHSElts =
831 cast<FixedVectorType>(RHS->getType())->getNumElements();
832 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
833return std::make_pair(LR.first,RHS);
834 }
835
836if (VecOp == PermittedRHS) {
837// We've gone as far as we can: anything on the other side of the
838// extractelement will already have been converted into a shuffle.
839unsigned NumLHSElts =
840 cast<FixedVectorType>(EI->getOperand(0)->getType())
841 ->getNumElements();
842for (unsigned i = 0; i != NumElts; ++i)
843 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
844return std::make_pair(EI->getOperand(0), PermittedRHS);
845 }
846
847// If this insertelement is a chain that comes from exactly these two
848// vectors, return the vector and the effective shuffle.
849if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
850collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
851 Mask))
852return std::make_pair(EI->getOperand(0), PermittedRHS);
853 }
854 }
855 }
856
857// Otherwise, we can't do anything fancy. Return an identity vector.
858for (unsigned i = 0; i != NumElts; ++i)
859 Mask.push_back(i);
860return std::make_pair(V,nullptr);
861}
862
863/// Look for chain of insertvalue's that fully define an aggregate, and trace
864/// back the values inserted, see if they are all were extractvalue'd from
865/// the same source aggregate from the exact same element indexes.
866/// If they were, just reuse the source aggregate.
867/// This potentially deals with PHI indirections.
868Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
869InsertValueInst &OrigIVI) {
870Type *AggTy = OrigIVI.getType();
871unsigned NumAggElts;
872switch (AggTy->getTypeID()) {
873caseType::StructTyID:
874 NumAggElts = AggTy->getStructNumElements();
875break;
876caseType::ArrayTyID:
877 NumAggElts = AggTy->getArrayNumElements();
878break;
879default:
880llvm_unreachable("Unhandled aggregate type?");
881 }
882
883// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
884// to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
885// FIXME: any interesting patterns to be caught with larger limit?
886assert(NumAggElts > 0 &&"Aggregate should have elements.");
887if (NumAggElts > 2)
888returnnullptr;
889
890staticconstexprauto NotFound = std::nullopt;
891staticconstexprauto FoundMismatch =nullptr;
892
893// Try to find a value of each element of an aggregate.
894// FIXME: deal with more complex, not one-dimensional, aggregate types
895SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
896
897// Do we know values for each element of the aggregate?
898auto KnowAllElts = [&AggElts]() {
899return !llvm::is_contained(AggElts, NotFound);
900 };
901
902intDepth = 0;
903
904// Arbitrary `insertvalue` visitation depth limit. Let's be okay with
905// every element being overwritten twice, which should never happen.
906staticconstint DepthLimit = 2 * NumAggElts;
907
908// Recurse up the chain of `insertvalue` aggregate operands until either we've
909// reconstructed full initializer or can't visit any more `insertvalue`'s.
910for (InsertValueInst *CurrIVI = &OrigIVI;
911Depth < DepthLimit && CurrIVI && !KnowAllElts();
912 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
913 ++Depth) {
914auto *InsertedValue =
915 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
916if (!InsertedValue)
917returnnullptr;// Inserted value must be produced by an instruction.
918
919ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
920
921// Don't bother with more than single-level aggregates.
922if (Indices.size() != 1)
923returnnullptr;// FIXME: deal with more complex aggregates?
924
925// Now, we may have already previously recorded the value for this element
926// of an aggregate. If we did, that means the CurrIVI will later be
927// overwritten with the already-recorded value. But if not, let's record it!
928 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
929 Elt = Elt.value_or(InsertedValue);
930
931// FIXME: should we handle chain-terminating undef base operand?
932 }
933
934// Was that sufficient to deduce the full initializer for the aggregate?
935if (!KnowAllElts())
936returnnullptr;// Give up then.
937
938// We now want to find the source[s] of the aggregate elements we've found.
939// And with "source" we mean the original aggregate[s] from which
940// the inserted elements were extracted. This may require PHI translation.
941
942enum class AggregateDescription {
943 /// When analyzing the value that was inserted into an aggregate, we did
944 /// not manage to find defining `extractvalue` instruction to analyze.
945 NotFound,
946 /// When analyzing the value that was inserted into an aggregate, we did
947 /// manage to find defining `extractvalue` instruction[s], and everything
948 /// matched perfectly - aggregate type, element insertion/extraction index.
949 Found,
950 /// When analyzing the value that was inserted into an aggregate, we did
951 /// manage to find defining `extractvalue` instruction, but there was
952 /// a mismatch: either the source type from which the extraction was didn't
953 /// match the aggregate type into which the insertion was,
954 /// or the extraction/insertion channels mismatched,
955 /// or different elements had different source aggregates.
956 FoundMismatch
957 };
958auto Describe = [](std::optional<Value *> SourceAggregate) {
959if (SourceAggregate == NotFound)
960return AggregateDescription::NotFound;
961if (*SourceAggregate == FoundMismatch)
962return AggregateDescription::FoundMismatch;
963return AggregateDescription::Found;
964 };
965
966// If an aggregate element is defined in UseBB, we can't use it in PredBB.
967bool EltDefinedInUseBB =false;
968
969// Given the value \p Elt that was being inserted into element \p EltIdx of an
970// aggregate AggTy, see if \p Elt was originally defined by an
971// appropriate extractvalue (same element index, same aggregate type).
972// If found, return the source aggregate from which the extraction was.
973// If \p PredBB is provided, does PHI translation of an \p Elt first.
974auto FindSourceAggregate =
975 [&](Instruction *Elt,unsigned EltIdx, std::optional<BasicBlock *> UseBB,
976 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
977// For now(?), only deal with, at most, a single level of PHI indirection.
978if (UseBB && PredBB) {
979 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
980if (Elt && Elt->getParent() == *UseBB)
981 EltDefinedInUseBB =true;
982 }
983// FIXME: deal with multiple levels of PHI indirection?
984
985// Did we find an extraction?
986auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
987if (!EVI)
988return NotFound;
989
990Value *SourceAggregate = EVI->getAggregateOperand();
991
992// Is the extraction from the same type into which the insertion was?
993if (SourceAggregate->getType() != AggTy)
994return FoundMismatch;
995// And the element index doesn't change between extraction and insertion?
996if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
997return FoundMismatch;
998
999return SourceAggregate;// AggregateDescription::Found
1000 };
1001
1002// Given elements AggElts that were constructing an aggregate OrigIVI,
1003// see if we can find appropriate source aggregate for each of the elements,
1004// and see it's the same aggregate for each element. If so, return it.
1005auto FindCommonSourceAggregate =
1006 [&](std::optional<BasicBlock *> UseBB,
1007 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1008 std::optional<Value *> SourceAggregate;
1009
1010for (autoI :enumerate(AggElts)) {
1011assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1012"We don't store nullptr in SourceAggregate!");
1013assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1014 (I.index() != 0) &&
1015"SourceAggregate should be valid after the first element,");
1016
1017// For this element, is there a plausible source aggregate?
1018// FIXME: we could special-case undef element, IFF we know that in the
1019// source aggregate said element isn't poison.
1020 std::optional<Value *> SourceAggregateForElement =
1021 FindSourceAggregate(*I.value(),I.index(), UseBB, PredBB);
1022
1023// Okay, what have we found? Does that correlate with previous findings?
1024
1025// Regardless of whether or not we have previously found source
1026// aggregate for previous elements (if any), if we didn't find one for
1027// this element, passthrough whatever we have just found.
1028if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1029return SourceAggregateForElement;
1030
1031// Okay, we have found source aggregate for this element.
1032// Let's see what we already know from previous elements, if any.
1033switch (Describe(SourceAggregate)) {
1034case AggregateDescription::NotFound:
1035// This is apparently the first element that we have examined.
1036 SourceAggregate = SourceAggregateForElement;// Record the aggregate!
1037continue;// Great, now look at next element.
1038case AggregateDescription::Found:
1039// We have previously already successfully examined other elements.
1040// Is this the same source aggregate we've found for other elements?
1041if (*SourceAggregateForElement != *SourceAggregate)
1042return FoundMismatch;
1043continue;// Still the same aggregate, look at next element.
1044case AggregateDescription::FoundMismatch:
1045llvm_unreachable("Can't happen. We would have early-exited then.");
1046 };
1047 }
1048
1049assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1050"Must be a valid Value");
1051return *SourceAggregate;
1052 };
1053
1054 std::optional<Value *> SourceAggregate;
1055
1056// Can we find the source aggregate without looking at predecessors?
1057 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1058/*PredBB=*/std::nullopt);
1059if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1060if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1061returnnullptr;// Conflicting source aggregates!
1062 ++NumAggregateReconstructionsSimplified;
1063returnreplaceInstUsesWith(OrigIVI, *SourceAggregate);
1064 }
1065
1066// Okay, apparently we need to look at predecessors.
1067
1068// We should be smart about picking the "use" basic block, which will be the
1069// merge point for aggregate, where we'll insert the final PHI that will be
1070// used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1071// We should look in which blocks each of the AggElts is being defined,
1072// they all should be defined in the same basic block.
1073BasicBlock *UseBB =nullptr;
1074
1075for (const std::optional<Instruction *> &I : AggElts) {
1076BasicBlock *BB = (*I)->getParent();
1077// If it's the first instruction we've encountered, record the basic block.
1078if (!UseBB) {
1079 UseBB = BB;
1080continue;
1081 }
1082// Otherwise, this must be the same basic block we've seen previously.
1083if (UseBB != BB)
1084returnnullptr;
1085 }
1086
1087// If *all* of the elements are basic-block-independent, meaning they are
1088// either function arguments, or constant expressions, then if we didn't
1089// handle them without predecessor-aware handling, we won't handle them now.
1090if (!UseBB)
1091returnnullptr;
1092
1093// If we didn't manage to find source aggregate without looking at
1094// predecessors, and there are no predecessors to look at, then we're done.
1095if (pred_empty(UseBB))
1096returnnullptr;
1097
1098// Arbitrary predecessor count limit.
1099staticconstint PredCountLimit = 64;
1100
1101// Cache the (non-uniqified!) list of predecessors in a vector,
1102// checking the limit at the same time for efficiency.
1103SmallVector<BasicBlock *, 4> Preds;// May have duplicates!
1104for (BasicBlock *Pred :predecessors(UseBB)) {
1105// Don't bother if there are too many predecessors.
1106if (Preds.size() >= PredCountLimit)// FIXME: only count duplicates once?
1107returnnullptr;
1108 Preds.emplace_back(Pred);
1109 }
1110
1111// For each predecessor, what is the source aggregate,
1112// from which all the elements were originally extracted from?
1113// Note that we want for the map to have stable iteration order!
1114SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1115bool FoundSrcAgg =false;
1116for (BasicBlock *Pred : Preds) {
1117 std::pair<decltype(SourceAggregates)::iterator,bool>IV =
1118 SourceAggregates.insert({Pred,nullptr});
1119// Did we already evaluate this predecessor?
1120if (!IV.second)
1121continue;
1122
1123// Let's hope that when coming from predecessor Pred, all elements of the
1124// aggregate produced by OrigIVI must have been originally extracted from
1125// the same aggregate. Is that so? Can we find said original aggregate?
1126 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1127if (Describe(SourceAggregate) == AggregateDescription::Found) {
1128 FoundSrcAgg =true;
1129IV.first->second = *SourceAggregate;
1130 }else {
1131// If UseBB is the single successor of Pred, we can add InsertValue to
1132// Pred.
1133auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1134if (!BI || !BI->isUnconditional())
1135returnnullptr;
1136 }
1137 }
1138
1139if (!FoundSrcAgg)
1140returnnullptr;
1141
1142// Do some sanity check if we need to add insertvalue into predecessors.
1143auto OrigBB = OrigIVI.getParent();
1144for (auto &It : SourceAggregates) {
1145if (Describe(It.second) == AggregateDescription::Found)
1146continue;
1147
1148// Element is defined in UseBB, so it can't be used in predecessors.
1149if (EltDefinedInUseBB)
1150returnnullptr;
1151
1152// Do this transformation cross loop boundary may cause dead loop. So we
1153// should avoid this situation. But LoopInfo is not generally available, we
1154// must be conservative here.
1155// If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1156// can't be in inner loop.
1157if (UseBB != OrigBB)
1158returnnullptr;
1159
1160// Avoid constructing constant aggregate because constant value may expose
1161// more optimizations.
1162bool ConstAgg =true;
1163for (auto Val : AggElts) {
1164Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1165if (!isa<Constant>(Elt)) {
1166 ConstAgg =false;
1167break;
1168 }
1169 }
1170if (ConstAgg)
1171returnnullptr;
1172 }
1173
1174// For predecessors without appropriate source aggregate, create one in the
1175// predecessor.
1176for (auto &It : SourceAggregates) {
1177if (Describe(It.second) == AggregateDescription::Found)
1178continue;
1179
1180BasicBlock *Pred = It.first;
1181 Builder.SetInsertPoint(Pred->getTerminator());
1182Value *V =PoisonValue::get(AggTy);
1183for (auto [Idx, Val] :enumerate(AggElts)) {
1184Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1185V = Builder.CreateInsertValue(V, Elt,Idx);
1186 }
1187
1188 It.second =V;
1189 }
1190
1191// All good! Now we just need to thread the source aggregates here.
1192// Note that we have to insert the new PHI here, ourselves, because we can't
1193// rely on InstCombinerImpl::run() inserting it into the right basic block.
1194// Note that the same block can be a predecessor more than once,
1195// and we need to preserve that invariant for the PHI node.
1196BuilderTy::InsertPointGuard Guard(Builder);
1197 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1198auto *PHI =
1199 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() +".merged");
1200for (BasicBlock *Pred : Preds)
1201PHI->addIncoming(SourceAggregates[Pred], Pred);
1202
1203 ++NumAggregateReconstructionsSimplified;
1204return replaceInstUsesWith(OrigIVI,PHI);
1205}
1206
1207/// Try to find redundant insertvalue instructions, like the following ones:
1208/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1209/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1210/// Here the second instruction inserts values at the same indices, as the
1211/// first one, making the first one redundant.
1212/// It should be transformed to:
1213/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1214Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1215if (Value *V =simplifyInsertValueInst(
1216I.getAggregateOperand(),I.getInsertedValueOperand(),I.getIndices(),
1217SQ.getWithInstruction(&I)))
1218returnreplaceInstUsesWith(I, V);
1219
1220bool IsRedundant =false;
1221ArrayRef<unsigned int> FirstIndices =I.getIndices();
1222
1223// If there is a chain of insertvalue instructions (each of them except the
1224// last one has only one use and it's another insertvalue insn from this
1225// chain), check if any of the 'children' uses the same indices as the first
1226// instruction. In this case, the first one is redundant.
1227Value *V = &I;
1228unsignedDepth = 0;
1229while (V->hasOneUse() &&Depth < 10) {
1230User *U = V->user_back();
1231auto UserInsInst = dyn_cast<InsertValueInst>(U);
1232if (!UserInsInst || U->getOperand(0) != V)
1233break;
1234if (UserInsInst->getIndices() == FirstIndices) {
1235 IsRedundant =true;
1236break;
1237 }
1238 V = UserInsInst;
1239Depth++;
1240 }
1241
1242if (IsRedundant)
1243returnreplaceInstUsesWith(I,I.getOperand(0));
1244
1245if (Instruction *NewI =foldAggregateConstructionIntoAggregateReuse(I))
1246return NewI;
1247
1248returnnullptr;
1249}
1250
1251staticboolisShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1252// Can not analyze scalable type, the number of elements is not a compile-time
1253// constant.
1254if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1255returnfalse;
1256
1257int MaskSize = Shuf.getShuffleMask().size();
1258int VecSize =
1259 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1260
1261// A vector select does not change the size of the operands.
1262if (MaskSize != VecSize)
1263returnfalse;
1264
1265// Each mask element must be undefined or choose a vector element from one of
1266// the source operands without crossing vector lanes.
1267for (int i = 0; i != MaskSize; ++i) {
1268int Elt = Shuf.getMaskValue(i);
1269if (Elt != -1 && Elt != i && Elt != i + VecSize)
1270returnfalse;
1271 }
1272
1273returntrue;
1274}
1275
1276/// Turn a chain of inserts that splats a value into an insert + shuffle:
1277/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1278/// shufflevector(insertelt(X, %k, 0), poison, zero)
1279staticInstruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1280// We are interested in the last insert in a chain. So if this insert has a
1281// single user and that user is an insert, bail.
1282if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1283returnnullptr;
1284
1285VectorType *VecTy = InsElt.getType();
1286// Can not handle scalable type, the number of elements is not a compile-time
1287// constant.
1288if (isa<ScalableVectorType>(VecTy))
1289returnnullptr;
1290unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1291
1292// Do not try to do this for a one-element vector, since that's a nop,
1293// and will cause an inf-loop.
1294if (NumElements == 1)
1295returnnullptr;
1296
1297Value *SplatVal = InsElt.getOperand(1);
1298InsertElementInst *CurrIE = &InsElt;
1299SmallBitVector ElementPresent(NumElements,false);
1300InsertElementInst *FirstIE =nullptr;
1301
1302// Walk the chain backwards, keeping track of which indices we inserted into,
1303// until we hit something that isn't an insert of the splatted value.
1304while (CurrIE) {
1305auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1306if (!Idx || CurrIE->getOperand(1) != SplatVal)
1307returnnullptr;
1308
1309auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1310// Check none of the intermediate steps have any additional uses, except
1311// for the root insertelement instruction, which can be re-used, if it
1312// inserts at position 0.
1313if (CurrIE != &InsElt &&
1314 (!CurrIE->hasOneUse() && (NextIE !=nullptr || !Idx->isZero())))
1315returnnullptr;
1316
1317 ElementPresent[Idx->getZExtValue()] =true;
1318 FirstIE = CurrIE;
1319 CurrIE = NextIE;
1320 }
1321
1322// If this is just a single insertelement (not a sequence), we are done.
1323if (FirstIE == &InsElt)
1324returnnullptr;
1325
1326// If we are not inserting into a poison vector, make sure we've seen an
1327// insert into every element.
1328// TODO: If the base vector is not undef, it might be better to create a splat
1329// and then a select-shuffle (blend) with the base vector.
1330if (!match(FirstIE->getOperand(0),m_Poison()))
1331if (!ElementPresent.all())
1332returnnullptr;
1333
1334// Create the insert + shuffle.
1335Type *Int64Ty =Type::getInt64Ty(InsElt.getContext());
1336PoisonValue *PoisonVec =PoisonValue::get(VecTy);
1337Constant *Zero = ConstantInt::get(Int64Ty, 0);
1338if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1339 FirstIE =InsertElementInst::Create(PoisonVec, SplatVal, Zero,"",
1340 InsElt.getIterator());
1341
1342// Splat from element 0, but replace absent elements with poison in the mask.
1343SmallVector<int, 16> Mask(NumElements, 0);
1344for (unsigned i = 0; i != NumElements; ++i)
1345if (!ElementPresent[i])
1346 Mask[i] = -1;
1347
1348returnnewShuffleVectorInst(FirstIE, Mask);
1349}
1350
1351/// Try to fold an insert element into an existing splat shuffle by changing
1352/// the shuffle's mask to include the index of this insert element.
1353staticInstruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1354// Check if the vector operand of this insert is a canonical splat shuffle.
1355auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1356if (!Shuf || !Shuf->isZeroEltSplat())
1357returnnullptr;
1358
1359// Bail out early if shuffle is scalable type. The number of elements in
1360// shuffle mask is unknown at compile-time.
1361if (isa<ScalableVectorType>(Shuf->getType()))
1362returnnullptr;
1363
1364// Check for a constant insertion index.
1365uint64_t IdxC;
1366if (!match(InsElt.getOperand(2),m_ConstantInt(IdxC)))
1367returnnullptr;
1368
1369// Check if the splat shuffle's input is the same as this insert's scalar op.
1370Value *X = InsElt.getOperand(1);
1371Value *Op0 = Shuf->getOperand(0);
1372if (!match(Op0,m_InsertElt(m_Undef(),m_Specific(X),m_ZeroInt())))
1373returnnullptr;
1374
1375// Replace the shuffle mask element at the index of this insert with a zero.
1376// For example:
1377// inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1378// --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1379unsigned NumMaskElts =
1380 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1381SmallVector<int, 16> NewMask(NumMaskElts);
1382for (unsigned i = 0; i != NumMaskElts; ++i)
1383 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1384
1385returnnewShuffleVectorInst(Op0, NewMask);
1386}
1387
1388/// Try to fold an extract+insert element into an existing identity shuffle by
1389/// changing the shuffle's mask to include the index of this insert element.
1390staticInstruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1391// Check if the vector operand of this insert is an identity shuffle.
1392auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1393if (!Shuf || !match(Shuf->getOperand(1),m_Poison()) ||
1394 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1395returnnullptr;
1396
1397// Bail out early if shuffle is scalable type. The number of elements in
1398// shuffle mask is unknown at compile-time.
1399if (isa<ScalableVectorType>(Shuf->getType()))
1400returnnullptr;
1401
1402// Check for a constant insertion index.
1403uint64_t IdxC;
1404if (!match(InsElt.getOperand(2),m_ConstantInt(IdxC)))
1405returnnullptr;
1406
1407// Check if this insert's scalar op is extracted from the identity shuffle's
1408// input vector.
1409Value *Scalar = InsElt.getOperand(1);
1410Value *X = Shuf->getOperand(0);
1411if (!match(Scalar,m_ExtractElt(m_Specific(X),m_SpecificInt(IdxC))))
1412returnnullptr;
1413
1414// Replace the shuffle mask element at the index of this extract+insert with
1415// that same index value.
1416// For example:
1417// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1418unsigned NumMaskElts =
1419 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1420SmallVector<int, 16> NewMask(NumMaskElts);
1421ArrayRef<int> OldMask = Shuf->getShuffleMask();
1422for (unsigned i = 0; i != NumMaskElts; ++i) {
1423if (i != IdxC) {
1424// All mask elements besides the inserted element remain the same.
1425 NewMask[i] = OldMask[i];
1426 }elseif (OldMask[i] == (int)IdxC) {
1427// If the mask element was already set, there's nothing to do
1428// (demanded elements analysis may unset it later).
1429returnnullptr;
1430 }else {
1431assert(OldMask[i] ==PoisonMaskElem &&
1432"Unexpected shuffle mask element for identity shuffle");
1433 NewMask[i] = IdxC;
1434 }
1435 }
1436
1437returnnewShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1438}
1439
1440/// If we have an insertelement instruction feeding into another insertelement
1441/// and the 2nd is inserting a constant into the vector, canonicalize that
1442/// constant insertion before the insertion of a variable:
1443///
1444/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1445/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1446///
1447/// This has the potential of eliminating the 2nd insertelement instruction
1448/// via constant folding of the scalar constant into a vector constant.
1449staticInstruction *hoistInsEltConst(InsertElementInst &InsElt2,
1450InstCombiner::BuilderTy &Builder) {
1451auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1452if (!InsElt1 || !InsElt1->hasOneUse())
1453returnnullptr;
1454
1455Value *X, *Y;
1456Constant *ScalarC;
1457ConstantInt *IdxC1, *IdxC2;
1458if (match(InsElt1->getOperand(0),m_Value(X)) &&
1459match(InsElt1->getOperand(1),m_Value(Y)) && !isa<Constant>(Y) &&
1460match(InsElt1->getOperand(2),m_ConstantInt(IdxC1)) &&
1461match(InsElt2.getOperand(1),m_Constant(ScalarC)) &&
1462match(InsElt2.getOperand(2),m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1463Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1464returnInsertElementInst::Create(NewInsElt1,Y, IdxC1);
1465 }
1466
1467returnnullptr;
1468}
1469
1470/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1471/// --> shufflevector X, CVec', Mask'
1472staticInstruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1473auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1474// Bail out if the parent has more than one use. In that case, we'd be
1475// replacing the insertelt with a shuffle, and that's not a clear win.
1476if (!Inst || !Inst->hasOneUse())
1477returnnullptr;
1478if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1479// The shuffle must have a constant vector operand. The insertelt must have
1480// a constant scalar being inserted at a constant position in the vector.
1481Constant *ShufConstVec, *InsEltScalar;
1482uint64_t InsEltIndex;
1483if (!match(Shuf->getOperand(1),m_Constant(ShufConstVec)) ||
1484 !match(InsElt.getOperand(1),m_Constant(InsEltScalar)) ||
1485 !match(InsElt.getOperand(2),m_ConstantInt(InsEltIndex)))
1486returnnullptr;
1487
1488// Adding an element to an arbitrary shuffle could be expensive, but a
1489// shuffle that selects elements from vectors without crossing lanes is
1490// assumed cheap.
1491// If we're just adding a constant into that shuffle, it will still be
1492// cheap.
1493if (!isShuffleEquivalentToSelect(*Shuf))
1494returnnullptr;
1495
1496// From the above 'select' check, we know that the mask has the same number
1497// of elements as the vector input operands. We also know that each constant
1498// input element is used in its lane and can not be used more than once by
1499// the shuffle. Therefore, replace the constant in the shuffle's constant
1500// vector with the insertelt constant. Replace the constant in the shuffle's
1501// mask vector with the insertelt index plus the length of the vector
1502// (because the constant vector operand of a shuffle is always the 2nd
1503// operand).
1504ArrayRef<int> Mask = Shuf->getShuffleMask();
1505unsigned NumElts = Mask.size();
1506SmallVector<Constant *, 16> NewShufElts(NumElts);
1507SmallVector<int, 16> NewMaskElts(NumElts);
1508for (unsignedI = 0;I != NumElts; ++I) {
1509if (I == InsEltIndex) {
1510 NewShufElts[I] = InsEltScalar;
1511 NewMaskElts[I] = InsEltIndex + NumElts;
1512 }else {
1513// Copy over the existing values.
1514 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1515 NewMaskElts[I] = Mask[I];
1516 }
1517
1518// Bail if we failed to find an element.
1519if (!NewShufElts[I])
1520returnnullptr;
1521 }
1522
1523// Create new operands for a shuffle that includes the constant of the
1524// original insertelt. The old shuffle will be dead now.
1525returnnewShuffleVectorInst(Shuf->getOperand(0),
1526ConstantVector::get(NewShufElts), NewMaskElts);
1527 }elseif (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1528// Transform sequences of insertelements ops with constant data/indexes into
1529// a single shuffle op.
1530// Can not handle scalable type, the number of elements needed to create
1531// shuffle mask is not a compile-time constant.
1532if (isa<ScalableVectorType>(InsElt.getType()))
1533returnnullptr;
1534unsigned NumElts =
1535 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1536
1537uint64_t InsertIdx[2];
1538Constant *Val[2];
1539if (!match(InsElt.getOperand(2),m_ConstantInt(InsertIdx[0])) ||
1540 !match(InsElt.getOperand(1),m_Constant(Val[0])) ||
1541 !match(IEI->getOperand(2),m_ConstantInt(InsertIdx[1])) ||
1542 !match(IEI->getOperand(1),m_Constant(Val[1])))
1543returnnullptr;
1544SmallVector<Constant *, 16> Values(NumElts);
1545SmallVector<int, 16> Mask(NumElts);
1546auto ValI = std::begin(Val);
1547// Generate new constant vector and mask.
1548// We have 2 values/masks from the insertelements instructions. Insert them
1549// into new value/mask vectors.
1550for (uint64_tI : InsertIdx) {
1551if (!Values[I]) {
1552 Values[I] = *ValI;
1553 Mask[I] = NumElts +I;
1554 }
1555 ++ValI;
1556 }
1557// Remaining values are filled with 'poison' values.
1558for (unsignedI = 0;I < NumElts; ++I) {
1559if (!Values[I]) {
1560 Values[I] =PoisonValue::get(InsElt.getType()->getElementType());
1561 Mask[I] =I;
1562 }
1563 }
1564// Create new operands for a shuffle that includes the constant of the
1565// original insertelt.
1566returnnewShuffleVectorInst(IEI->getOperand(0),
1567ConstantVector::get(Values), Mask);
1568 }
1569returnnullptr;
1570}
1571
1572/// If both the base vector and the inserted element are extended from the same
1573/// type, do the insert element in the narrow source type followed by extend.
1574/// TODO: This can be extended to include other cast opcodes, but particularly
1575/// if we create a wider insertelement, make sure codegen is not harmed.
1576staticInstruction *narrowInsElt(InsertElementInst &InsElt,
1577InstCombiner::BuilderTy &Builder) {
1578// We are creating a vector extend. If the original vector extend has another
1579// use, that would mean we end up with 2 vector extends, so avoid that.
1580// TODO: We could ease the use-clause to "if at least one op has one use"
1581// (assuming that the source types match - see next TODO comment).
1582Value *Vec = InsElt.getOperand(0);
1583if (!Vec->hasOneUse())
1584returnnullptr;
1585
1586Value *Scalar = InsElt.getOperand(1);
1587Value *X, *Y;
1588CastInst::CastOps CastOpcode;
1589if (match(Vec,m_FPExt(m_Value(X))) &&match(Scalar,m_FPExt(m_Value(Y))))
1590 CastOpcode = Instruction::FPExt;
1591elseif (match(Vec,m_SExt(m_Value(X))) &&match(Scalar,m_SExt(m_Value(Y))))
1592 CastOpcode = Instruction::SExt;
1593elseif (match(Vec,m_ZExt(m_Value(X))) &&match(Scalar,m_ZExt(m_Value(Y))))
1594 CastOpcode = Instruction::ZExt;
1595else
1596returnnullptr;
1597
1598// TODO: We can allow mismatched types by creating an intermediate cast.
1599if (X->getType()->getScalarType() !=Y->getType())
1600returnnullptr;
1601
1602// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1603Value *NewInsElt = Builder.CreateInsertElement(X,Y, InsElt.getOperand(2));
1604returnCastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1605}
1606
1607/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1608/// try to convert to a single insert with appropriate bitcasts.
1609staticInstruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1610bool IsBigEndian,
1611InstCombiner::BuilderTy &Builder) {
1612Value *VecOp = InsElt.getOperand(0);
1613Value *ScalarOp = InsElt.getOperand(1);
1614Value *IndexOp = InsElt.getOperand(2);
1615
1616// Pattern depends on endian because we expect lower index is inserted first.
1617// Big endian:
1618// inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1619// Little endian:
1620// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1621// Note: It is not safe to do this transform with an arbitrary base vector
1622// because the bitcast of that vector to fewer/larger elements could
1623// allow poison to spill into an element that was not poison before.
1624// TODO: Detect smaller fractions of the scalar.
1625// TODO: One-use checks are conservative.
1626auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1627Value *Scalar0, *BaseVec;
1628uint64_t Index0, Index1;
1629if (!VTy || (VTy->getNumElements() & 1) ||
1630 !match(IndexOp,m_ConstantInt(Index1)) ||
1631 !match(VecOp,m_InsertElt(m_Value(BaseVec),m_Value(Scalar0),
1632m_ConstantInt(Index0))) ||
1633 !match(BaseVec,m_Undef()))
1634returnnullptr;
1635
1636// The first insert must be to the index one less than this one, and
1637// the first insert must be to an even index.
1638if (Index0 + 1 != Index1 || Index0 & 1)
1639returnnullptr;
1640
1641// For big endian, the high half of the value should be inserted first.
1642// For little endian, the low half of the value should be inserted first.
1643Value *X;
1644uint64_t ShAmt;
1645if (IsBigEndian) {
1646if (!match(ScalarOp,m_Trunc(m_Value(X))) ||
1647 !match(Scalar0,m_Trunc(m_LShr(m_Specific(X),m_ConstantInt(ShAmt)))))
1648returnnullptr;
1649 }else {
1650if (!match(Scalar0,m_Trunc(m_Value(X))) ||
1651 !match(ScalarOp,m_Trunc(m_LShr(m_Specific(X),m_ConstantInt(ShAmt)))))
1652returnnullptr;
1653 }
1654
1655Type *SrcTy =X->getType();
1656unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1657unsigned VecEltWidth = VTy->getScalarSizeInBits();
1658if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1659returnnullptr;
1660
1661// Bitcast the base vector to a vector type with the source element type.
1662Type *CastTy =FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1663Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1664
1665// Scale the insert index for a vector with half as many elements.
1666// bitcast (inselt (bitcast BaseVec), X, NewIndex)
1667uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1668Value *NewInsert = Builder.CreateInsertElement(CastBaseVec,X, NewIndex);
1669returnnewBitCastInst(NewInsert, VTy);
1670}
1671
1672Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1673Value *VecOp = IE.getOperand(0);
1674Value *ScalarOp = IE.getOperand(1);
1675Value *IdxOp = IE.getOperand(2);
1676
1677if (auto *V =simplifyInsertElementInst(
1678 VecOp, ScalarOp, IdxOp,SQ.getWithInstruction(&IE)))
1679returnreplaceInstUsesWith(IE, V);
1680
1681// Canonicalize type of constant indices to i64 to simplify CSE
1682if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1683if (auto *NewIdx =getPreferredVectorIndex(IndexC))
1684returnreplaceOperand(IE, 2, NewIdx);
1685
1686Value *BaseVec, *OtherScalar;
1687uint64_t OtherIndexVal;
1688if (match(VecOp,m_OneUse(m_InsertElt(m_Value(BaseVec),
1689m_Value(OtherScalar),
1690m_ConstantInt(OtherIndexVal)))) &&
1691 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1692Value *NewIns =Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1693returnInsertElementInst::Create(NewIns, OtherScalar,
1694Builder.getInt64(OtherIndexVal));
1695 }
1696 }
1697
1698// If the scalar is bitcast and inserted into undef, do the insert in the
1699// source type followed by bitcast.
1700// TODO: Generalize for insert into any constant, not just undef?
1701Value *ScalarSrc;
1702if (match(VecOp,m_Undef()) &&
1703match(ScalarOp,m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1704 (ScalarSrc->getType()->isIntegerTy() ||
1705 ScalarSrc->getType()->isFloatingPointTy())) {
1706// inselt undef, (bitcast ScalarSrc), IdxOp -->
1707// bitcast (inselt undef, ScalarSrc, IdxOp)
1708Type *ScalarTy = ScalarSrc->getType();
1709Type *VecTy =VectorType::get(ScalarTy, IE.getType()->getElementCount());
1710Constant *NewUndef = isa<PoisonValue>(VecOp) ?PoisonValue::get(VecTy)
1711 :UndefValue::get(VecTy);
1712Value *NewInsElt =Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1713returnnewBitCastInst(NewInsElt, IE.getType());
1714 }
1715
1716// If the vector and scalar are both bitcast from the same element type, do
1717// the insert in that source type followed by bitcast.
1718Value *VecSrc;
1719if (match(VecOp,m_BitCast(m_Value(VecSrc))) &&
1720match(ScalarOp,m_BitCast(m_Value(ScalarSrc))) &&
1721 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1722 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1723 cast<VectorType>(VecSrc->getType())->getElementType() ==
1724 ScalarSrc->getType()) {
1725// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1726// bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1727Value *NewInsElt =Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1728returnnewBitCastInst(NewInsElt, IE.getType());
1729 }
1730
1731// If the inserted element was extracted from some other fixed-length vector
1732// and both indexes are valid constants, try to turn this into a shuffle.
1733// Can not handle scalable vector type, the number of elements needed to
1734// create shuffle mask is not a compile-time constant.
1735uint64_t InsertedIdx, ExtractedIdx;
1736Value *ExtVecOp;
1737if (isa<FixedVectorType>(IE.getType()) &&
1738match(IdxOp,m_ConstantInt(InsertedIdx)) &&
1739match(ScalarOp,
1740m_ExtractElt(m_Value(ExtVecOp),m_ConstantInt(ExtractedIdx))) &&
1741 isa<FixedVectorType>(ExtVecOp->getType()) &&
1742 ExtractedIdx <
1743 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1744// TODO: Looking at the user(s) to determine if this insert is a
1745// fold-to-shuffle opportunity does not match the usual instcombine
1746// constraints. We should decide if the transform is worthy based only
1747// on this instruction and its operands, but that may not work currently.
1748//
1749// Here, we are trying to avoid creating shuffles before reaching
1750// the end of a chain of extract-insert pairs. This is complicated because
1751// we do not generally form arbitrary shuffle masks in instcombine
1752// (because those may codegen poorly), but collectShuffleElements() does
1753// exactly that.
1754//
1755// The rules for determining what is an acceptable target-independent
1756// shuffle mask are fuzzy because they evolve based on the backend's
1757// capabilities and real-world impact.
1758auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1759if (!Insert.hasOneUse())
1760returntrue;
1761auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1762if (!InsertUser)
1763returntrue;
1764returnfalse;
1765 };
1766
1767// Try to form a shuffle from a chain of extract-insert ops.
1768if (isShuffleRootCandidate(IE)) {
1769bool Rerun =true;
1770while (Rerun) {
1771 Rerun =false;
1772
1773SmallVector<int, 16> Mask;
1774ShuffleOps LR =
1775collectShuffleElements(&IE, Mask,nullptr, *this, Rerun);
1776
1777// The proposed shuffle may be trivial, in which case we shouldn't
1778// perform the combine.
1779if (LR.first != &IE && LR.second != &IE) {
1780// We now have a shuffle of LHS, RHS, Mask.
1781if (LR.second ==nullptr)
1782 LR.second =PoisonValue::get(LR.first->getType());
1783returnnewShuffleVectorInst(LR.first, LR.second, Mask);
1784 }
1785 }
1786 }
1787 }
1788
1789if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1790unsigned VWidth = VecTy->getNumElements();
1791APInt PoisonElts(VWidth, 0);
1792APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1793if (Value *V =SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1794 PoisonElts)) {
1795if (V != &IE)
1796returnreplaceInstUsesWith(IE, V);
1797return &IE;
1798 }
1799 }
1800
1801if (Instruction *Shuf =foldConstantInsEltIntoShuffle(IE))
1802return Shuf;
1803
1804if (Instruction *NewInsElt =hoistInsEltConst(IE,Builder))
1805return NewInsElt;
1806
1807if (Instruction *Broadcast =foldInsSequenceIntoSplat(IE))
1808return Broadcast;
1809
1810if (Instruction *Splat =foldInsEltIntoSplat(IE))
1811returnSplat;
1812
1813if (Instruction *IdentityShuf =foldInsEltIntoIdentityShuffle(IE))
1814return IdentityShuf;
1815
1816if (Instruction *Ext =narrowInsElt(IE,Builder))
1817return Ext;
1818
1819if (Instruction *Ext =foldTruncInsEltPair(IE,DL.isBigEndian(),Builder))
1820return Ext;
1821
1822returnnullptr;
1823}
1824
1825/// Return true if we can evaluate the specified expression tree if the vector
1826/// elements were shuffled in a different order.
1827staticboolcanEvaluateShuffled(Value *V,ArrayRef<int> Mask,
1828unsignedDepth = 5) {
1829// We can always reorder the elements of a constant.
1830if (isa<Constant>(V))
1831returntrue;
1832
1833// We won't reorder vector arguments. No IPO here.
1834Instruction *I = dyn_cast<Instruction>(V);
1835if (!I)returnfalse;
1836
1837// Two users may expect different orders of the elements. Don't try it.
1838if (!I->hasOneUse())
1839returnfalse;
1840
1841if (Depth == 0)returnfalse;
1842
1843switch (I->getOpcode()) {
1844case Instruction::UDiv:
1845case Instruction::SDiv:
1846case Instruction::URem:
1847case Instruction::SRem:
1848// Propagating an undefined shuffle mask element to integer div/rem is not
1849// allowed because those opcodes can create immediate undefined behavior
1850// from an undefined element in an operand.
1851if (llvm::is_contained(Mask, -1))
1852returnfalse;
1853 [[fallthrough]];
1854case Instruction::Add:
1855case Instruction::FAdd:
1856case Instruction::Sub:
1857case Instruction::FSub:
1858case Instruction::Mul:
1859case Instruction::FMul:
1860case Instruction::FDiv:
1861case Instruction::FRem:
1862case Instruction::Shl:
1863case Instruction::LShr:
1864case Instruction::AShr:
1865case Instruction::And:
1866case Instruction::Or:
1867case Instruction::Xor:
1868case Instruction::ICmp:
1869case Instruction::FCmp:
1870case Instruction::Trunc:
1871case Instruction::ZExt:
1872case Instruction::SExt:
1873case Instruction::FPToUI:
1874case Instruction::FPToSI:
1875case Instruction::UIToFP:
1876case Instruction::SIToFP:
1877case Instruction::FPTrunc:
1878case Instruction::FPExt:
1879case Instruction::GetElementPtr: {
1880// Bail out if we would create longer vector ops. We could allow creating
1881// longer vector ops, but that may result in more expensive codegen.
1882Type *ITy =I->getType();
1883if (ITy->isVectorTy() &&
1884 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1885returnfalse;
1886for (Value *Operand :I->operands()) {
1887if (!canEvaluateShuffled(Operand, Mask,Depth - 1))
1888returnfalse;
1889 }
1890returntrue;
1891 }
1892case Instruction::InsertElement: {
1893ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1894if (!CI)returnfalse;
1895int ElementNumber = CI->getLimitedValue();
1896
1897// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1898// can't put an element into multiple indices.
1899bool SeenOnce =false;
1900for (intI : Mask) {
1901if (I == ElementNumber) {
1902if (SeenOnce)
1903returnfalse;
1904 SeenOnce =true;
1905 }
1906 }
1907returncanEvaluateShuffled(I->getOperand(0), Mask,Depth - 1);
1908 }
1909 }
1910returnfalse;
1911}
1912
1913/// Rebuild a new instruction just like 'I' but with the new operands given.
1914/// In the event of type mismatch, the type of the operands is correct.
1915staticValue *buildNew(Instruction *I,ArrayRef<Value*> NewOps,
1916IRBuilderBase &Builder) {
1917 Builder.SetInsertPoint(I);
1918switch (I->getOpcode()) {
1919case Instruction::Add:
1920case Instruction::FAdd:
1921case Instruction::Sub:
1922case Instruction::FSub:
1923case Instruction::Mul:
1924case Instruction::FMul:
1925case Instruction::UDiv:
1926case Instruction::SDiv:
1927case Instruction::FDiv:
1928case Instruction::URem:
1929case Instruction::SRem:
1930case Instruction::FRem:
1931case Instruction::Shl:
1932case Instruction::LShr:
1933case Instruction::AShr:
1934case Instruction::And:
1935case Instruction::Or:
1936case Instruction::Xor: {
1937BinaryOperator *BO = cast<BinaryOperator>(I);
1938assert(NewOps.size() == 2 &&"binary operator with #ops != 2");
1939Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1940 NewOps[0], NewOps[1]);
1941if (auto *NewI = dyn_cast<Instruction>(New)) {
1942if (isa<OverflowingBinaryOperator>(BO)) {
1943 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1944 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1945 }
1946if (isa<PossiblyExactOperator>(BO)) {
1947 NewI->setIsExact(BO->isExact());
1948 }
1949if (isa<FPMathOperator>(BO))
1950 NewI->copyFastMathFlags(I);
1951 }
1952return New;
1953 }
1954case Instruction::ICmp:
1955assert(NewOps.size() == 2 &&"icmp with #ops != 2");
1956return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1957 NewOps[1]);
1958case Instruction::FCmp:
1959assert(NewOps.size() == 2 &&"fcmp with #ops != 2");
1960return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1961 NewOps[1]);
1962case Instruction::Trunc:
1963case Instruction::ZExt:
1964case Instruction::SExt:
1965case Instruction::FPToUI:
1966case Instruction::FPToSI:
1967case Instruction::UIToFP:
1968case Instruction::SIToFP:
1969case Instruction::FPTrunc:
1970case Instruction::FPExt: {
1971// It's possible that the mask has a different number of elements from
1972// the original cast. We recompute the destination type to match the mask.
1973Type *DestTy =VectorType::get(
1974I->getType()->getScalarType(),
1975 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1976assert(NewOps.size() == 1 &&"cast with #ops != 1");
1977return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1978 DestTy);
1979 }
1980case Instruction::GetElementPtr: {
1981Value *Ptr = NewOps[0];
1982ArrayRef<Value*>Idx = NewOps.slice(1);
1983return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
1984Ptr,Idx,"",
1985 cast<GEPOperator>(I)->isInBounds());
1986 }
1987 }
1988llvm_unreachable("failed to rebuild vector instructions");
1989}
1990
1991staticValue *evaluateInDifferentElementOrder(Value *V,ArrayRef<int> Mask,
1992IRBuilderBase &Builder) {
1993// Mask.size() does not need to be equal to the number of vector elements.
1994
1995assert(V->getType()->isVectorTy() &&"can't reorder non-vector elements");
1996Type *EltTy = V->getType()->getScalarType();
1997
1998if (isa<PoisonValue>(V))
1999returnPoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2000
2001if (match(V,m_Undef()))
2002returnUndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2003
2004if (isa<ConstantAggregateZero>(V))
2005returnConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2006
2007if (Constant *C = dyn_cast<Constant>(V))
2008returnConstantExpr::getShuffleVector(C,PoisonValue::get(C->getType()),
2009 Mask);
2010
2011Instruction *I = cast<Instruction>(V);
2012switch (I->getOpcode()) {
2013case Instruction::Add:
2014case Instruction::FAdd:
2015case Instruction::Sub:
2016case Instruction::FSub:
2017case Instruction::Mul:
2018case Instruction::FMul:
2019case Instruction::UDiv:
2020case Instruction::SDiv:
2021case Instruction::FDiv:
2022case Instruction::URem:
2023case Instruction::SRem:
2024case Instruction::FRem:
2025case Instruction::Shl:
2026case Instruction::LShr:
2027case Instruction::AShr:
2028case Instruction::And:
2029case Instruction::Or:
2030case Instruction::Xor:
2031case Instruction::ICmp:
2032case Instruction::FCmp:
2033case Instruction::Trunc:
2034case Instruction::ZExt:
2035case Instruction::SExt:
2036case Instruction::FPToUI:
2037case Instruction::FPToSI:
2038case Instruction::UIToFP:
2039case Instruction::SIToFP:
2040case Instruction::FPTrunc:
2041case Instruction::FPExt:
2042case Instruction::Select:
2043case Instruction::GetElementPtr: {
2044SmallVector<Value*, 8> NewOps;
2045bool NeedsRebuild =
2046 (Mask.size() !=
2047 cast<FixedVectorType>(I->getType())->getNumElements());
2048for (int i = 0, e =I->getNumOperands(); i != e; ++i) {
2049Value *V;
2050// Recursively call evaluateInDifferentElementOrder on vector arguments
2051// as well. E.g. GetElementPtr may have scalar operands even if the
2052// return value is a vector, so we need to examine the operand type.
2053if (I->getOperand(i)->getType()->isVectorTy())
2054 V =evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2055else
2056 V =I->getOperand(i);
2057 NewOps.push_back(V);
2058 NeedsRebuild |= (V !=I->getOperand(i));
2059 }
2060if (NeedsRebuild)
2061returnbuildNew(I, NewOps, Builder);
2062returnI;
2063 }
2064case Instruction::InsertElement: {
2065int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2066
2067// The insertelement was inserting at Element. Figure out which element
2068// that becomes after shuffling. The answer is guaranteed to be unique
2069// by CanEvaluateShuffled.
2070bool Found =false;
2071int Index = 0;
2072for (int e = Mask.size(); Index != e; ++Index) {
2073if (Mask[Index] == Element) {
2074 Found =true;
2075break;
2076 }
2077 }
2078
2079// If element is not in Mask, no need to handle the operand 1 (element to
2080// be inserted). Just evaluate values in operand 0 according to Mask.
2081if (!Found)
2082returnevaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2083
2084Value *V =evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2085 Builder);
2086 Builder.SetInsertPoint(I);
2087return Builder.CreateInsertElement(V,I->getOperand(1), Index);
2088 }
2089 }
2090llvm_unreachable("failed to reorder elements of vector instruction!");
2091}
2092
2093// Returns true if the shuffle is extracting a contiguous range of values from
2094// LHS, for example:
2095// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2096// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2097// Shuffles to: |EE|FF|GG|HH|
2098// +--+--+--+--+
2099staticboolisShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
2100ArrayRef<int> Mask) {
2101unsigned LHSElems =
2102 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2103unsigned MaskElems = Mask.size();
2104unsigned BegIdx = Mask.front();
2105unsigned EndIdx = Mask.back();
2106if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2107returnfalse;
2108for (unsignedI = 0;I != MaskElems; ++I)
2109if (static_cast<unsigned>(Mask[I]) != BegIdx +I)
2110returnfalse;
2111returntrue;
2112}
2113
2114/// These are the ingredients in an alternate form binary operator as described
2115/// below.
2116structBinopElts {
2117BinaryOperator::BinaryOpsOpcode;
2118Value *Op0;
2119Value *Op1;
2120BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
2121Value *V0 =nullptr,Value *V1 =nullptr) :
2122 Opcode(Opc), Op0(V0), Op1(V1) {}
2123operatorbool() const{return Opcode != 0; }
2124};
2125
2126/// Binops may be transformed into binops with different opcodes and operands.
2127/// Reverse the usual canonicalization to enable folds with the non-canonical
2128/// form of the binop. If a transform is possible, return the elements of the
2129/// new binop. If not, return invalid elements.
2130staticBinopEltsgetAlternateBinop(BinaryOperator *BO,constDataLayout &DL) {
2131Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2132Type *Ty = BO->getType();
2133switch (BO->getOpcode()) {
2134case Instruction::Shl: {
2135// shl X, C --> mul X, (1 << C)
2136Constant *C;
2137if (match(BO1,m_ImmConstant(C))) {
2138Constant *ShlOne =ConstantFoldBinaryOpOperands(
2139 Instruction::Shl, ConstantInt::get(Ty, 1),C,DL);
2140assert(ShlOne &&"Constant folding of immediate constants failed");
2141return {Instruction::Mul, BO0, ShlOne};
2142 }
2143break;
2144 }
2145case Instruction::Or: {
2146// or disjoin X, C --> add X, C
2147if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2148return {Instruction::Add, BO0, BO1};
2149break;
2150 }
2151case Instruction::Sub:
2152// sub 0, X --> mul X, -1
2153if (match(BO0,m_ZeroInt()))
2154return {Instruction::Mul, BO1,ConstantInt::getAllOnesValue(Ty)};
2155break;
2156default:
2157break;
2158 }
2159return {};
2160}
2161
2162/// A select shuffle of a select shuffle with a shared operand can be reduced
2163/// to a single select shuffle. This is an obvious improvement in IR, and the
2164/// backend is expected to lower select shuffles efficiently.
2165staticInstruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2166assert(Shuf.isSelect() &&"Must have select-equivalent shuffle");
2167
2168Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2169SmallVector<int, 16> Mask;
2170 Shuf.getShuffleMask(Mask);
2171unsigned NumElts = Mask.size();
2172
2173// Canonicalize a select shuffle with common operand as Op1.
2174auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2175if (ShufOp && ShufOp->isSelect() &&
2176 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2177std::swap(Op0, Op1);
2178ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
2179 }
2180
2181 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2182if (!ShufOp || !ShufOp->isSelect() ||
2183 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2184returnnullptr;
2185
2186Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2187SmallVector<int, 16> Mask1;
2188 ShufOp->getShuffleMask(Mask1);
2189assert(Mask1.size() == NumElts &&"Vector size changed with select shuffle");
2190
2191// Canonicalize common operand (Op0) as X (first operand of first shuffle).
2192if (Y == Op0) {
2193std::swap(X,Y);
2194ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);
2195 }
2196
2197// If the mask chooses from X (operand 0), it stays the same.
2198// If the mask chooses from the earlier shuffle, the other mask value is
2199// transferred to the combined select shuffle:
2200// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2201SmallVector<int, 16> NewMask(NumElts);
2202for (unsigned i = 0; i != NumElts; ++i)
2203 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2204
2205// A select mask with undef elements might look like an identity mask.
2206assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2207ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2208"Unexpected shuffle mask");
2209returnnewShuffleVectorInst(X,Y, NewMask);
2210}
2211
2212staticInstruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2213constSimplifyQuery &SQ) {
2214assert(Shuf.isSelect() &&"Must have select-equivalent shuffle");
2215
2216// Are we shuffling together some value and that same value after it has been
2217// modified by a binop with a constant?
2218Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2219Constant *C;
2220bool Op0IsBinop;
2221if (match(Op0,m_BinOp(m_Specific(Op1),m_Constant(C))))
2222 Op0IsBinop =true;
2223elseif (match(Op1,m_BinOp(m_Specific(Op0),m_Constant(C))))
2224 Op0IsBinop =false;
2225else
2226returnnullptr;
2227
2228// The identity constant for a binop leaves a variable operand unchanged. For
2229// a vector, this is a splat of something like 0, -1, or 1.
2230// If there's no identity constant for this binop, we're done.
2231auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2232BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2233Constant *IdC =ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(),true);
2234if (!IdC)
2235returnnullptr;
2236
2237Value *X = Op0IsBinop ? Op1 : Op0;
2238
2239// Prevent folding in the case the non-binop operand might have NaN values.
2240// If X can have NaN elements then we have that the floating point math
2241// operation in the transformed code may not preserve the exact NaN
2242// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2243// This makes the transformation incorrect since the original program would
2244// have preserved the exact NaN bit-pattern.
2245// Avoid the folding if X can have NaN elements.
2246if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2247 !isKnownNeverNaN(X, 0, SQ))
2248returnnullptr;
2249
2250// Shuffle identity constants into the lanes that return the original value.
2251// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2252// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2253// The existing binop constant vector remains in the same operand position.
2254ArrayRef<int> Mask = Shuf.getShuffleMask();
2255Constant *NewC = Op0IsBinop ?ConstantExpr::getShuffleVector(C, IdC, Mask) :
2256ConstantExpr::getShuffleVector(IdC,C, Mask);
2257
2258bool MightCreatePoisonOrUB =
2259is_contained(Mask,PoisonMaskElem) &&
2260 (Instruction::isIntDivRem(BOpcode) ||Instruction::isShift(BOpcode));
2261if (MightCreatePoisonOrUB)
2262 NewC =InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC,true);
2263
2264// shuf (bop X, C), X, M --> bop X, C'
2265// shuf X, (bop X, C), M --> bop X, C'
2266Instruction *NewBO =BinaryOperator::Create(BOpcode,X, NewC);
2267 NewBO->copyIRFlags(BO);
2268
2269// An undef shuffle mask element may propagate as an undef constant element in
2270// the new binop. That would produce poison where the original code might not.
2271// If we already made a safe constant, then there's no danger.
2272if (is_contained(Mask,PoisonMaskElem) && !MightCreatePoisonOrUB)
2273 NewBO->dropPoisonGeneratingFlags();
2274return NewBO;
2275}
2276
2277/// If we have an insert of a scalar to a non-zero element of an undefined
2278/// vector and then shuffle that value, that's the same as inserting to the zero
2279/// element and shuffling. Splatting from the zero element is recognized as the
2280/// canonical form of splat.
2281staticInstruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
2282InstCombiner::BuilderTy &Builder) {
2283Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2284ArrayRef<int> Mask = Shuf.getShuffleMask();
2285Value *X;
2286uint64_t IndexC;
2287
2288// Match a shuffle that is a splat to a non-zero element.
2289if (!match(Op0,m_OneUse(m_InsertElt(m_Poison(),m_Value(X),
2290m_ConstantInt(IndexC)))) ||
2291 !match(Op1,m_Poison()) ||match(Mask,m_ZeroMask()) || IndexC == 0)
2292returnnullptr;
2293
2294// Insert into element 0 of a poison vector.
2295PoisonValue *PoisonVec =PoisonValue::get(Shuf.getType());
2296Value *NewIns = Builder.CreateInsertElement(PoisonVec,X, (uint64_t)0);
2297
2298// Splat from element 0. Any mask element that is poison remains poison.
2299// For example:
2300// shuf (inselt poison, X, 2), _, <2,2,undef>
2301// --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2302unsigned NumMaskElts =
2303 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2304SmallVector<int, 16> NewMask(NumMaskElts, 0);
2305for (unsigned i = 0; i != NumMaskElts; ++i)
2306if (Mask[i] ==PoisonMaskElem)
2307 NewMask[i] = Mask[i];
2308
2309returnnewShuffleVectorInst(NewIns, NewMask);
2310}
2311
2312/// Try to fold shuffles that are the equivalent of a vector select.
2313Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
2314if (!Shuf.isSelect())
2315returnnullptr;
2316
2317// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2318// Commuting undef to operand 0 conflicts with another canonicalization.
2319unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2320if (!match(Shuf.getOperand(1),m_Undef()) &&
2321 Shuf.getMaskValue(0) >= (int)NumElts) {
2322// TODO: Can we assert that both operands of a shuffle-select are not undef
2323// (otherwise, it would have been folded by instsimplify?
2324 Shuf.commute();
2325return &Shuf;
2326 }
2327
2328if (Instruction *I =foldSelectShuffleOfSelectShuffle(Shuf))
2329returnI;
2330
2331if (Instruction *I =foldSelectShuffleWith1Binop(
2332 Shuf,getSimplifyQuery().getWithInstruction(&Shuf)))
2333returnI;
2334
2335BinaryOperator *B0, *B1;
2336if (!match(Shuf.getOperand(0),m_BinOp(B0)) ||
2337 !match(Shuf.getOperand(1),m_BinOp(B1)))
2338returnnullptr;
2339
2340// If one operand is "0 - X", allow that to be viewed as "X * -1"
2341// (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2342// with a multiply, we will exit because C0/C1 will not be set.
2343Value *X, *Y;
2344Constant *C0 =nullptr, *C1 =nullptr;
2345bool ConstantsAreOp1;
2346if (match(B0,m_BinOp(m_Constant(C0),m_Value(X))) &&
2347match(B1,m_BinOp(m_Constant(C1),m_Value(Y))))
2348 ConstantsAreOp1 =false;
2349elseif (match(B0,m_CombineOr(m_BinOp(m_Value(X),m_Constant(C0)),
2350m_Neg(m_Value(X)))) &&
2351match(B1,m_CombineOr(m_BinOp(m_Value(Y),m_Constant(C1)),
2352m_Neg(m_Value(Y)))))
2353 ConstantsAreOp1 =true;
2354else
2355returnnullptr;
2356
2357// We need matching binops to fold the lanes together.
2358BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2359BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2360bool DropNSW =false;
2361if (ConstantsAreOp1 && Opc0 != Opc1) {
2362// TODO: We drop "nsw" if shift is converted into multiply because it may
2363// not be correct when the shift amount is BitWidth - 1. We could examine
2364// each vector element to determine if it is safe to keep that flag.
2365if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2366 DropNSW =true;
2367if (BinopElts AltB0 =getAlternateBinop(B0,DL)) {
2368assert(isa<Constant>(AltB0.Op1) &&"Expecting constant with alt binop");
2369 Opc0 = AltB0.Opcode;
2370 C0 = cast<Constant>(AltB0.Op1);
2371 }elseif (BinopElts AltB1 =getAlternateBinop(B1,DL)) {
2372assert(isa<Constant>(AltB1.Op1) &&"Expecting constant with alt binop");
2373 Opc1 = AltB1.Opcode;
2374 C1 = cast<Constant>(AltB1.Op1);
2375 }
2376 }
2377
2378if (Opc0 != Opc1 || !C0 || !C1)
2379returnnullptr;
2380
2381// The opcodes must be the same. Use a new name to make that clear.
2382BinaryOperator::BinaryOps BOpc = Opc0;
2383
2384// Select the constant elements needed for the single binop.
2385ArrayRef<int> Mask = Shuf.getShuffleMask();
2386Constant *NewC =ConstantExpr::getShuffleVector(C0, C1, Mask);
2387
2388// We are moving a binop after a shuffle. When a shuffle has an undefined
2389// mask element, the result is undefined, but it is not poison or undefined
2390// behavior. That is not necessarily true for div/rem/shift.
2391bool MightCreatePoisonOrUB =
2392is_contained(Mask,PoisonMaskElem) &&
2393 (Instruction::isIntDivRem(BOpc) ||Instruction::isShift(BOpc));
2394if (MightCreatePoisonOrUB)
2395 NewC =InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,
2396 ConstantsAreOp1);
2397
2398Value *V;
2399if (X ==Y) {
2400// Remove a binop and the shuffle by rearranging the constant:
2401// shuffle (op V, C0), (op V, C1), M --> op V, C'
2402// shuffle (op C0, V), (op C1, V), M --> op C', V
2403 V =X;
2404 }else {
2405// If there are 2 different variable operands, we must create a new shuffle
2406// (select) first, so check uses to ensure that we don't end up with more
2407// instructions than we started with.
2408if (!B0->hasOneUse() && !B1->hasOneUse())
2409returnnullptr;
2410
2411// If we use the original shuffle mask and op1 is *variable*, we would be
2412// putting an undef into operand 1 of div/rem/shift. This is either UB or
2413// poison. We do not have to guard against UB when *constants* are op1
2414// because safe constants guarantee that we do not overflow sdiv/srem (and
2415// there's no danger for other opcodes).
2416// TODO: To allow this case, create a new shuffle mask with no undefs.
2417if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2418returnnullptr;
2419
2420// Note: In general, we do not create new shuffles in InstCombine because we
2421// do not know if a target can lower an arbitrary shuffle optimally. In this
2422// case, the shuffle uses the existing mask, so there is no additional risk.
2423
2424// Select the variable vectors first, then perform the binop:
2425// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2426// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2427 V =Builder.CreateShuffleVector(X,Y, Mask);
2428 }
2429
2430Value *NewBO = ConstantsAreOp1 ?Builder.CreateBinOp(BOpc, V, NewC) :
2431Builder.CreateBinOp(BOpc, NewC, V);
2432
2433// Flags are intersected from the 2 source binops. But there are 2 exceptions:
2434// 1. If we changed an opcode, poison conditions might have changed.
2435// 2. If the shuffle had undef mask elements, the new binop might have undefs
2436// where the original code did not. But if we already made a safe constant,
2437// then there's no danger.
2438if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2439 NewI->copyIRFlags(B0);
2440 NewI->andIRFlags(B1);
2441if (DropNSW)
2442 NewI->setHasNoSignedWrap(false);
2443if (is_contained(Mask,PoisonMaskElem) && !MightCreatePoisonOrUB)
2444 NewI->dropPoisonGeneratingFlags();
2445 }
2446returnreplaceInstUsesWith(Shuf, NewBO);
2447}
2448
2449/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2450/// Example (little endian):
2451/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2452staticInstruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2453bool IsBigEndian) {
2454// This must be a bitcasted shuffle of 1 vector integer operand.
2455Type *DestType = Shuf.getType();
2456Value *X;
2457if (!match(Shuf.getOperand(0),m_BitCast(m_Value(X))) ||
2458 !match(Shuf.getOperand(1),m_Poison()) || !DestType->isIntOrIntVectorTy())
2459returnnullptr;
2460
2461// The source type must have the same number of elements as the shuffle,
2462// and the source element type must be larger than the shuffle element type.
2463Type *SrcType =X->getType();
2464if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2465 cast<FixedVectorType>(SrcType)->getNumElements() !=
2466 cast<FixedVectorType>(DestType)->getNumElements() ||
2467 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2468returnnullptr;
2469
2470assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2471"Expected a shuffle that decreases length");
2472
2473// Last, check that the mask chooses the correct low bits for each narrow
2474// element in the result.
2475uint64_t TruncRatio =
2476 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2477ArrayRef<int> Mask = Shuf.getShuffleMask();
2478for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2479if (Mask[i] ==PoisonMaskElem)
2480continue;
2481uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2482assert(LSBIndex <= INT32_MAX &&"Overflowed 32-bits");
2483if (Mask[i] != (int)LSBIndex)
2484returnnullptr;
2485 }
2486
2487returnnewTruncInst(X, DestType);
2488}
2489
2490/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2491/// narrowing (concatenating with poison and extracting back to the original
2492/// length). This allows replacing the wide select with a narrow select.
2493staticInstruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2494InstCombiner::BuilderTy &Builder) {
2495// This must be a narrowing identity shuffle. It extracts the 1st N elements
2496// of the 1st vector operand of a shuffle.
2497if (!match(Shuf.getOperand(1),m_Poison()) || !Shuf.isIdentityWithExtract())
2498returnnullptr;
2499
2500// The vector being shuffled must be a vector select that we can eliminate.
2501// TODO: The one-use requirement could be eased if X and/or Y are constants.
2502Value *Cond, *X, *Y;
2503if (!match(Shuf.getOperand(0),
2504m_OneUse(m_Select(m_Value(Cond),m_Value(X),m_Value(Y)))))
2505returnnullptr;
2506
2507// We need a narrow condition value. It must be extended with poison elements
2508// and have the same number of elements as this shuffle.
2509unsigned NarrowNumElts =
2510 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2511Value *NarrowCond;
2512if (!match(Cond,m_OneUse(m_Shuffle(m_Value(NarrowCond),m_Poison()))) ||
2513 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2514 NarrowNumElts ||
2515 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2516returnnullptr;
2517
2518// shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2519// -->
2520// sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2521Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2522Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2523returnSelectInst::Create(NarrowCond, NarrowX, NarrowY);
2524}
2525
2526/// Canonicalize FP negate/abs after shuffle.
2527staticInstruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
2528InstCombiner::BuilderTy &Builder) {
2529auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2530Value *X;
2531if (!S0 || !match(S0,m_CombineOr(m_FNeg(m_Value(X)),m_FAbs(m_Value(X)))))
2532returnnullptr;
2533
2534bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2535
2536// Match 1-input (unary) shuffle.
2537// shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2538if (S0->hasOneUse() &&match(Shuf.getOperand(1),m_Poison())) {
2539Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2540if (IsFNeg)
2541returnUnaryOperator::CreateFNegFMF(NewShuf, S0);
2542
2543Function *FAbs =Intrinsic::getOrInsertDeclaration(
2544 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2545CallInst *NewF =CallInst::Create(FAbs, {NewShuf});
2546 NewF->setFastMathFlags(S0->getFastMathFlags());
2547return NewF;
2548 }
2549
2550// Match 2-input (binary) shuffle.
2551auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2552Value *Y;
2553if (!S1 || !match(S1,m_CombineOr(m_FNeg(m_Value(Y)),m_FAbs(m_Value(Y)))) ||
2554 S0->getOpcode() !=S1->getOpcode() ||
2555 (!S0->hasOneUse() && !S1->hasOneUse()))
2556returnnullptr;
2557
2558// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2559Value *NewShuf = Builder.CreateShuffleVector(X,Y, Shuf.getShuffleMask());
2560Instruction *NewF;
2561if (IsFNeg) {
2562 NewF = UnaryOperator::CreateFNeg(NewShuf);
2563 }else {
2564Function *FAbs =Intrinsic::getOrInsertDeclaration(
2565 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2566 NewF =CallInst::Create(FAbs, {NewShuf});
2567 }
2568 NewF->copyIRFlags(S0);
2569 NewF->andIRFlags(S1);
2570return NewF;
2571}
2572
2573/// Canonicalize casts after shuffle.
2574staticInstruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2575InstCombiner::BuilderTy &Builder) {
2576// Do we have 2 matching cast operands?
2577auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2578auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2579if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2580 Cast0->getSrcTy() != Cast1->getSrcTy())
2581returnnullptr;
2582
2583// TODO: Allow other opcodes? That would require easing the type restrictions
2584// below here.
2585CastInst::CastOps CastOpcode = Cast0->getOpcode();
2586switch (CastOpcode) {
2587case Instruction::FPToSI:
2588case Instruction::FPToUI:
2589case Instruction::SIToFP:
2590case Instruction::UIToFP:
2591break;
2592default:
2593returnnullptr;
2594 }
2595
2596VectorType *ShufTy = Shuf.getType();
2597VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2598VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2599
2600// TODO: Allow length-increasing shuffles?
2601if (ShufTy->getElementCount().getKnownMinValue() >
2602 ShufOpTy->getElementCount().getKnownMinValue())
2603returnnullptr;
2604
2605// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2606assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2607"Expected fixed vector operands for casts and binary shuffle");
2608if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2609returnnullptr;
2610
2611// At least one of the operands must have only one use (the shuffle).
2612if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2613returnnullptr;
2614
2615// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2616Value *X = Cast0->getOperand(0);
2617Value *Y = Cast1->getOperand(0);
2618Value *NewShuf = Builder.CreateShuffleVector(X,Y, Shuf.getShuffleMask());
2619returnCastInst::Create(CastOpcode, NewShuf, ShufTy);
2620}
2621
2622/// Try to fold an extract subvector operation.
2623staticInstruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2624Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2625if (!Shuf.isIdentityWithExtract() || !match(Op1,m_Poison()))
2626returnnullptr;
2627
2628// Check if we are extracting all bits of an inserted scalar:
2629// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2630Value *X;
2631if (match(Op0,m_BitCast(m_InsertElt(m_Value(),m_Value(X),m_Zero()))) &&
2632X->getType()->getPrimitiveSizeInBits() ==
2633 Shuf.getType()->getPrimitiveSizeInBits())
2634returnnewBitCastInst(X, Shuf.getType());
2635
2636// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2637Value *Y;
2638ArrayRef<int> Mask;
2639if (!match(Op0,m_Shuffle(m_Value(X),m_Value(Y),m_Mask(Mask))))
2640returnnullptr;
2641
2642// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2643// then combining may result in worse codegen.
2644if (!Op0->hasOneUse())
2645returnnullptr;
2646
2647// We are extracting a subvector from a shuffle. Remove excess elements from
2648// the 1st shuffle mask to eliminate the extract.
2649//
2650// This transform is conservatively limited to identity extracts because we do
2651// not allow arbitrary shuffle mask creation as a target-independent transform
2652// (because we can't guarantee that will lower efficiently).
2653//
2654// If the extracting shuffle has an poison mask element, it transfers to the
2655// new shuffle mask. Otherwise, copy the original mask element. Example:
2656// shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2657// shuf X, Y, <C0, poison, C2, poison>
2658unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2659SmallVector<int, 16> NewMask(NumElts);
2660assert(NumElts < Mask.size() &&
2661"Identity with extract must have less elements than its inputs");
2662
2663for (unsigned i = 0; i != NumElts; ++i) {
2664int ExtractMaskElt = Shuf.getMaskValue(i);
2665int MaskElt = Mask[i];
2666 NewMask[i] = ExtractMaskElt ==PoisonMaskElem ? ExtractMaskElt : MaskElt;
2667 }
2668returnnewShuffleVectorInst(X,Y, NewMask);
2669}
2670
2671/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2672/// operand with the operand of an insertelement.
2673staticInstruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2674InstCombinerImpl &IC) {
2675Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2676SmallVector<int, 16> Mask;
2677 Shuf.getShuffleMask(Mask);
2678
2679int NumElts = Mask.size();
2680int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2681
2682// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2683// not be able to handle it there if the insertelement has >1 use.
2684// If the shuffle has an insertelement operand but does not choose the
2685// inserted scalar element from that value, then we can replace that shuffle
2686// operand with the source vector of the insertelement.
2687Value *X;
2688uint64_t IdxC;
2689if (match(V0,m_InsertElt(m_Value(X),m_Value(),m_ConstantInt(IdxC)))) {
2690// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2691if (!is_contained(Mask, (int)IdxC))
2692return IC.replaceOperand(Shuf, 0,X);
2693 }
2694if (match(V1,m_InsertElt(m_Value(X),m_Value(),m_ConstantInt(IdxC)))) {
2695// Offset the index constant by the vector width because we are checking for
2696// accesses to the 2nd vector input of the shuffle.
2697 IdxC += InpNumElts;
2698// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2699if (!is_contained(Mask, (int)IdxC))
2700return IC.replaceOperand(Shuf, 1,X);
2701 }
2702// For the rest of the transform, the shuffle must not change vector sizes.
2703// TODO: This restriction could be removed if the insert has only one use
2704// (because the transform would require a new length-changing shuffle).
2705if (NumElts != InpNumElts)
2706returnnullptr;
2707
2708// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2709auto isShufflingScalarIntoOp1 = [&](Value *&Scalar,ConstantInt *&IndexC) {
2710// We need an insertelement with a constant index.
2711if (!match(V0,m_InsertElt(m_Value(),m_Value(Scalar),
2712m_ConstantInt(IndexC))))
2713returnfalse;
2714
2715// Test the shuffle mask to see if it splices the inserted scalar into the
2716// operand 1 vector of the shuffle.
2717int NewInsIndex = -1;
2718for (int i = 0; i != NumElts; ++i) {
2719// Ignore undef mask elements.
2720if (Mask[i] == -1)
2721continue;
2722
2723// The shuffle takes elements of operand 1 without lane changes.
2724if (Mask[i] == NumElts + i)
2725continue;
2726
2727// The shuffle must choose the inserted scalar exactly once.
2728if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2729returnfalse;
2730
2731// The shuffle is placing the inserted scalar into element i.
2732 NewInsIndex = i;
2733 }
2734
2735assert(NewInsIndex != -1 &&"Did not fold shuffle with unused operand?");
2736
2737// Index is updated to the potentially translated insertion lane.
2738 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2739returntrue;
2740 };
2741
2742// If the shuffle is unnecessary, insert the scalar operand directly into
2743// operand 1 of the shuffle. Example:
2744// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2745Value *Scalar;
2746ConstantInt *IndexC;
2747if (isShufflingScalarIntoOp1(Scalar, IndexC))
2748returnInsertElementInst::Create(V1, Scalar, IndexC);
2749
2750// Try again after commuting shuffle. Example:
2751// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2752// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2753std::swap(V0, V1);
2754ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
2755if (isShufflingScalarIntoOp1(Scalar, IndexC))
2756returnInsertElementInst::Create(V1, Scalar, IndexC);
2757
2758returnnullptr;
2759}
2760
2761staticInstruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2762// Match the operands as identity with padding (also known as concatenation
2763// with undef) shuffles of the same source type. The backend is expected to
2764// recreate these concatenations from a shuffle of narrow operands.
2765auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2766auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2767if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2768 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2769returnnullptr;
2770
2771// We limit this transform to power-of-2 types because we expect that the
2772// backend can convert the simplified IR patterns to identical nodes as the
2773// original IR.
2774// TODO: If we can verify the same behavior for arbitrary types, the
2775// power-of-2 checks can be removed.
2776Value *X = Shuffle0->getOperand(0);
2777Value *Y = Shuffle1->getOperand(0);
2778if (X->getType() !=Y->getType() ||
2779 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2780 !isPowerOf2_32(
2781 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2782 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2783match(X,m_Undef()) ||match(Y,m_Undef()))
2784returnnullptr;
2785assert(match(Shuffle0->getOperand(1),m_Undef()) &&
2786match(Shuffle1->getOperand(1),m_Undef()) &&
2787"Unexpected operand for identity shuffle");
2788
2789// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2790// operands directly by adjusting the shuffle mask to account for the narrower
2791// types:
2792// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2793int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2794int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2795assert(WideElts > NarrowElts &&"Unexpected types for identity with padding");
2796
2797ArrayRef<int> Mask = Shuf.getShuffleMask();
2798SmallVector<int, 16> NewMask(Mask.size(), -1);
2799for (int i = 0, e = Mask.size(); i != e; ++i) {
2800if (Mask[i] == -1)
2801continue;
2802
2803// If this shuffle is choosing an undef element from 1 of the sources, that
2804// element is undef.
2805if (Mask[i] < WideElts) {
2806if (Shuffle0->getMaskValue(Mask[i]) == -1)
2807continue;
2808 }else {
2809if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2810continue;
2811 }
2812
2813// If this shuffle is choosing from the 1st narrow op, the mask element is
2814// the same. If this shuffle is choosing from the 2nd narrow op, the mask
2815// element is offset down to adjust for the narrow vector widths.
2816if (Mask[i] < WideElts) {
2817assert(Mask[i] < NarrowElts &&"Unexpected shuffle mask");
2818 NewMask[i] = Mask[i];
2819 }else {
2820assert(Mask[i] < (WideElts + NarrowElts) &&"Unexpected shuffle mask");
2821 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2822 }
2823 }
2824returnnewShuffleVectorInst(X,Y, NewMask);
2825}
2826
2827// Splatting the first element of the result of a BinOp, where any of the
2828// BinOp's operands are the result of a first element splat can be simplified to
2829// splatting the first element of the result of the BinOp
2830Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2831if (!match(SVI.getOperand(1),m_Poison()) ||
2832 !match(SVI.getShuffleMask(),m_ZeroMask()) ||
2833 !SVI.getOperand(0)->hasOneUse())
2834returnnullptr;
2835
2836Value *Op0 = SVI.getOperand(0);
2837Value *X, *Y;
2838if (!match(Op0,m_BinOp(m_Shuffle(m_Value(X),m_Poison(),m_ZeroMask()),
2839m_Value(Y))) &&
2840 !match(Op0,m_BinOp(m_Value(X),
2841m_Shuffle(m_Value(Y),m_Poison(),m_ZeroMask()))))
2842returnnullptr;
2843if (X->getType() !=Y->getType())
2844returnnullptr;
2845
2846auto *BinOp = cast<BinaryOperator>(Op0);
2847if (!isSafeToSpeculativelyExecuteWithVariableReplaced(BinOp))
2848returnnullptr;
2849
2850Value *NewBO =Builder.CreateBinOp(BinOp->getOpcode(),X,Y);
2851if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2852 NewBOI->copyIRFlags(BinOp);
2853
2854returnnewShuffleVectorInst(NewBO, SVI.getShuffleMask());
2855}
2856
2857Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2858Value *LHS = SVI.getOperand(0);
2859Value *RHS = SVI.getOperand(1);
2860SimplifyQuery ShufQuery =SQ.getWithInstruction(&SVI);
2861if (auto *V =simplifyShuffleVectorInst(LHS,RHS, SVI.getShuffleMask(),
2862 SVI.getType(), ShufQuery))
2863returnreplaceInstUsesWith(SVI, V);
2864
2865if (Instruction *I =simplifyBinOpSplats(SVI))
2866returnI;
2867
2868// Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2869// order to support scalable vectors.
2870if (match(SVI.getShuffleMask(),m_ZeroMask()) && !isa<PoisonValue>(RHS))
2871returnreplaceOperand(SVI, 1,PoisonValue::get(RHS->getType()));
2872
2873if (isa<ScalableVectorType>(LHS->getType()))
2874returnnullptr;
2875
2876unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2877unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2878
2879// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2880//
2881// if X and Y are of the same (vector) type, and the element size is not
2882// changed by the bitcasts, we can distribute the bitcasts through the
2883// shuffle, hopefully reducing the number of instructions. We make sure that
2884// at least one bitcast only has one use, so we don't *increase* the number of
2885// instructions here.
2886Value *X, *Y;
2887if (match(LHS,m_BitCast(m_Value(X))) &&match(RHS,m_BitCast(m_Value(Y))) &&
2888X->getType()->isVectorTy() &&X->getType() ==Y->getType() &&
2889X->getType()->getScalarSizeInBits() ==
2890 SVI.getType()->getScalarSizeInBits() &&
2891 (LHS->hasOneUse() ||RHS->hasOneUse())) {
2892Value *V =Builder.CreateShuffleVector(X,Y, SVI.getShuffleMask(),
2893 SVI.getName() +".uncasted");
2894returnnewBitCastInst(V, SVI.getType());
2895 }
2896
2897ArrayRef<int> Mask = SVI.getShuffleMask();
2898
2899// Peek through a bitcasted shuffle operand by scaling the mask. If the
2900// simulated shuffle can simplify, then this shuffle is unnecessary:
2901// shuf (bitcast X), undef, Mask --> bitcast X'
2902// TODO: This could be extended to allow length-changing shuffles.
2903// The transform might also be obsoleted if we allowed canonicalization
2904// of bitcasted shuffles.
2905if (match(LHS,m_BitCast(m_Value(X))) &&match(RHS,m_Undef()) &&
2906X->getType()->isVectorTy() && VWidth == LHSWidth) {
2907// Try to create a scaled mask constant.
2908auto *XType = cast<FixedVectorType>(X->getType());
2909unsigned XNumElts = XType->getNumElements();
2910SmallVector<int, 16> ScaledMask;
2911if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2912// If the shuffled source vector simplifies, cast that value to this
2913// shuffle's type.
2914if (auto *V =simplifyShuffleVectorInst(X,UndefValue::get(XType),
2915 ScaledMask, XType, ShufQuery))
2916returnBitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2917 }
2918 }
2919
2920// shuffle x, x, mask --> shuffle x, undef, mask'
2921if (LHS ==RHS) {
2922assert(!match(RHS,m_Undef()) &&
2923"Shuffle with 2 undef ops not simplified?");
2924returnnewShuffleVectorInst(LHS,createUnaryMask(Mask, LHSWidth));
2925 }
2926
2927// shuffle undef, x, mask --> shuffle x, undef, mask'
2928if (match(LHS,m_Undef())) {
2929 SVI.commute();
2930return &SVI;
2931 }
2932
2933if (Instruction *I =canonicalizeInsertSplat(SVI,Builder))
2934returnI;
2935
2936if (Instruction *I =foldSelectShuffle(SVI))
2937returnI;
2938
2939if (Instruction *I =foldTruncShuffle(SVI,DL.isBigEndian()))
2940returnI;
2941
2942if (Instruction *I =narrowVectorSelect(SVI,Builder))
2943returnI;
2944
2945if (Instruction *I =foldShuffleOfUnaryOps(SVI,Builder))
2946returnI;
2947
2948if (Instruction *I =foldCastShuffle(SVI,Builder))
2949returnI;
2950
2951APInt PoisonElts(VWidth, 0);
2952APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2953if (Value *V =SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2954if (V != &SVI)
2955returnreplaceInstUsesWith(SVI, V);
2956return &SVI;
2957 }
2958
2959if (Instruction *I =foldIdentityExtractShuffle(SVI))
2960returnI;
2961
2962// These transforms have the potential to lose undef knowledge, so they are
2963// intentionally placed after SimplifyDemandedVectorElts().
2964if (Instruction *I =foldShuffleWithInsert(SVI, *this))
2965returnI;
2966if (Instruction *I =foldIdentityPaddedShuffles(SVI))
2967returnI;
2968
2969if (match(RHS,m_Constant())) {
2970if (auto *SI = dyn_cast<SelectInst>(LHS)) {
2971// We cannot do this fold for elementwise select since ShuffleVector is
2972// not elementwise.
2973if (SI->getCondition()->getType()->isIntegerTy() &&
2974 (isa<PoisonValue>(RHS) ||
2975isGuaranteedNotToBePoison(SI->getCondition()))) {
2976if (Instruction *I =FoldOpIntoSelect(SVI, SI))
2977returnI;
2978 }
2979 }
2980if (auto *PN = dyn_cast<PHINode>(LHS)) {
2981if (Instruction *I =foldOpIntoPhi(SVI, PN,/*AllowMultipleUses=*/true))
2982returnI;
2983 }
2984 }
2985
2986if (match(RHS,m_Poison()) &&canEvaluateShuffled(LHS, Mask)) {
2987Value *V =evaluateInDifferentElementOrder(LHS, Mask,Builder);
2988returnreplaceInstUsesWith(SVI, V);
2989 }
2990
2991// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2992// a non-vector type. We can instead bitcast the original vector followed by
2993// an extract of the desired element:
2994//
2995// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2996// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2997// %1 = bitcast <4 x i8> %sroa to i32
2998// Becomes:
2999// %bc = bitcast <16 x i8> %in to <4 x i32>
3000// %ext = extractelement <4 x i32> %bc, i32 0
3001//
3002// If the shuffle is extracting a contiguous range of values from the input
3003// vector then each use which is a bitcast of the extracted size can be
3004// replaced. This will work if the vector types are compatible, and the begin
3005// index is aligned to a value in the casted vector type. If the begin index
3006// isn't aligned then we can shuffle the original vector (keeping the same
3007// vector type) before extracting.
3008//
3009// This code will bail out if the target type is fundamentally incompatible
3010// with vectors of the source type.
3011//
3012// Example of <16 x i8>, target type i32:
3013// Index range [4,8): v-----------v Will work.
3014// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3015// <16 x i8>: | | | | | | | | | | | | | | | | |
3016// <4 x i32>: | | | | |
3017// +-----------+-----------+-----------+-----------+
3018// Index range [6,10): ^-----------^ Needs an extra shuffle.
3019// Target type i40: ^--------------^ Won't work, bail.
3020bool MadeChange =false;
3021if (isShuffleExtractingFromLHS(SVI, Mask)) {
3022Value *V =LHS;
3023unsigned MaskElems = Mask.size();
3024auto *SrcTy = cast<FixedVectorType>(V->getType());
3025unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
3026unsigned SrcElemBitWidth =DL.getTypeSizeInBits(SrcTy->getElementType());
3027assert(SrcElemBitWidth &&"vector elements must have a bitwidth");
3028unsigned SrcNumElems = SrcTy->getNumElements();
3029SmallVector<BitCastInst *, 8> BCs;
3030DenseMap<Type *, Value *> NewBCs;
3031for (User *U : SVI.users())
3032if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
3033if (!BC->use_empty())
3034// Only visit bitcasts that weren't previously handled.
3035 BCs.push_back(BC);
3036for (BitCastInst *BC : BCs) {
3037unsigned BegIdx = Mask.front();
3038Type *TgtTy = BC->getDestTy();
3039unsigned TgtElemBitWidth =DL.getTypeSizeInBits(TgtTy);
3040if (!TgtElemBitWidth)
3041continue;
3042unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3043bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3044bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3045if (!VecBitWidthsEqual)
3046continue;
3047if (!VectorType::isValidElementType(TgtTy))
3048continue;
3049auto *CastSrcTy =FixedVectorType::get(TgtTy, TgtNumElems);
3050if (!BegIsAligned) {
3051// Shuffle the input so [0,NumElements) contains the output, and
3052// [NumElems,SrcNumElems) is undef.
3053SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3054for (unsignedI = 0, E = MaskElems,Idx = BegIdx;I != E; ++Idx, ++I)
3055 ShuffleMask[I] =Idx;
3056 V =Builder.CreateShuffleVector(V, ShuffleMask,
3057 SVI.getName() +".extract");
3058 BegIdx = 0;
3059 }
3060unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3061assert(SrcElemsPerTgtElem);
3062 BegIdx /= SrcElemsPerTgtElem;
3063auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3064if (Inserted)
3065 It->second =Builder.CreateBitCast(V, CastSrcTy, SVI.getName() +".bc");
3066auto *Ext =Builder.CreateExtractElement(It->second, BegIdx,
3067 SVI.getName() +".extract");
3068// The shufflevector isn't being replaced: the bitcast that used it
3069// is. InstCombine will visit the newly-created instructions.
3070replaceInstUsesWith(*BC, Ext);
3071 MadeChange =true;
3072 }
3073 }
3074
3075// If the LHS is a shufflevector itself, see if we can combine it with this
3076// one without producing an unusual shuffle.
3077// Cases that might be simplified:
3078// 1.
3079// x1=shuffle(v1,v2,mask1)
3080// x=shuffle(x1,undef,mask)
3081// ==>
3082// x=shuffle(v1,undef,newMask)
3083// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3084// 2.
3085// x1=shuffle(v1,undef,mask1)
3086// x=shuffle(x1,x2,mask)
3087// where v1.size() == mask1.size()
3088// ==>
3089// x=shuffle(v1,x2,newMask)
3090// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3091// 3.
3092// x2=shuffle(v2,undef,mask2)
3093// x=shuffle(x1,x2,mask)
3094// where v2.size() == mask2.size()
3095// ==>
3096// x=shuffle(x1,v2,newMask)
3097// newMask[i] = (mask[i] < x1.size())
3098// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3099// 4.
3100// x1=shuffle(v1,undef,mask1)
3101// x2=shuffle(v2,undef,mask2)
3102// x=shuffle(x1,x2,mask)
3103// where v1.size() == v2.size()
3104// ==>
3105// x=shuffle(v1,v2,newMask)
3106// newMask[i] = (mask[i] < x1.size())
3107// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3108//
3109// Here we are really conservative:
3110// we are absolutely afraid of producing a shuffle mask not in the input
3111// program, because the code gen may not be smart enough to turn a merged
3112// shuffle into two specific shuffles: it may produce worse code. As such,
3113// we only merge two shuffles if the result is either a splat or one of the
3114// input shuffle masks. In this case, merging the shuffles just removes
3115// one instruction, which we know is safe. This is good for things like
3116// turning: (splat(splat)) -> splat, or
3117// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3118ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3119ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3120if (LHSShuffle)
3121if (!match(LHSShuffle->getOperand(1),m_Poison()) &&
3122 !match(RHS,m_Poison()))
3123 LHSShuffle =nullptr;
3124if (RHSShuffle)
3125if (!match(RHSShuffle->getOperand(1),m_Poison()))
3126 RHSShuffle =nullptr;
3127if (!LHSShuffle && !RHSShuffle)
3128return MadeChange ? &SVI :nullptr;
3129
3130Value* LHSOp0 =nullptr;
3131Value* LHSOp1 =nullptr;
3132Value* RHSOp0 =nullptr;
3133unsigned LHSOp0Width = 0;
3134unsigned RHSOp0Width = 0;
3135if (LHSShuffle) {
3136 LHSOp0 = LHSShuffle->getOperand(0);
3137 LHSOp1 = LHSShuffle->getOperand(1);
3138 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3139 }
3140if (RHSShuffle) {
3141 RHSOp0 = RHSShuffle->getOperand(0);
3142 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3143 }
3144Value* newLHS =LHS;
3145Value* newRHS =RHS;
3146if (LHSShuffle) {
3147// case 1
3148if (match(RHS,m_Poison())) {
3149 newLHS = LHSOp0;
3150 newRHS = LHSOp1;
3151 }
3152// case 2 or 4
3153elseif (LHSOp0Width == LHSWidth) {
3154 newLHS = LHSOp0;
3155 }
3156 }
3157// case 3 or 4
3158if (RHSShuffle && RHSOp0Width == LHSWidth) {
3159 newRHS = RHSOp0;
3160 }
3161// case 4
3162if (LHSOp0 == RHSOp0) {
3163 newLHS = LHSOp0;
3164 newRHS =nullptr;
3165 }
3166
3167if (newLHS ==LHS && newRHS ==RHS)
3168return MadeChange ? &SVI :nullptr;
3169
3170ArrayRef<int> LHSMask;
3171ArrayRef<int> RHSMask;
3172if (newLHS !=LHS)
3173 LHSMask = LHSShuffle->getShuffleMask();
3174if (RHSShuffle && newRHS !=RHS)
3175 RHSMask = RHSShuffle->getShuffleMask();
3176
3177unsigned newLHSWidth = (newLHS !=LHS) ? LHSOp0Width : LHSWidth;
3178SmallVector<int, 16> newMask;
3179boolisSplat =true;
3180int SplatElt = -1;
3181// Create a new mask for the new ShuffleVectorInst so that the new
3182// ShuffleVectorInst is equivalent to the original one.
3183for (unsigned i = 0; i < VWidth; ++i) {
3184int eltMask;
3185if (Mask[i] < 0) {
3186// This element is a poison value.
3187 eltMask = -1;
3188 }elseif (Mask[i] < (int)LHSWidth) {
3189// This element is from left hand side vector operand.
3190//
3191// If LHS is going to be replaced (case 1, 2, or 4), calculate the
3192// new mask value for the element.
3193if (newLHS !=LHS) {
3194 eltMask = LHSMask[Mask[i]];
3195// If the value selected is an poison value, explicitly specify it
3196// with a -1 mask value.
3197if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3198 eltMask = -1;
3199 }else
3200 eltMask = Mask[i];
3201 }else {
3202// This element is from right hand side vector operand
3203//
3204// If the value selected is a poison value, explicitly specify it
3205// with a -1 mask value. (case 1)
3206if (match(RHS,m_Poison()))
3207 eltMask = -1;
3208// If RHS is going to be replaced (case 3 or 4), calculate the
3209// new mask value for the element.
3210elseif (newRHS !=RHS) {
3211 eltMask = RHSMask[Mask[i]-LHSWidth];
3212// If the value selected is an poison value, explicitly specify it
3213// with a -1 mask value.
3214if (eltMask >= (int)RHSOp0Width) {
3215assert(match(RHSShuffle->getOperand(1),m_Poison()) &&
3216"should have been check above");
3217 eltMask = -1;
3218 }
3219 }else
3220 eltMask = Mask[i]-LHSWidth;
3221
3222// If LHS's width is changed, shift the mask value accordingly.
3223// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3224// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3225// If newRHS == newLHS, we want to remap any references from newRHS to
3226// newLHS so that we can properly identify splats that may occur due to
3227// obfuscation across the two vectors.
3228if (eltMask >= 0 && newRHS !=nullptr && newLHS != newRHS)
3229 eltMask += newLHSWidth;
3230 }
3231
3232// Check if this could still be a splat.
3233if (eltMask >= 0) {
3234if (SplatElt >= 0 && SplatElt != eltMask)
3235isSplat =false;
3236 SplatElt = eltMask;
3237 }
3238
3239 newMask.push_back(eltMask);
3240 }
3241
3242// If the result mask is equal to one of the original shuffle masks,
3243// or is a splat, do the replacement.
3244if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3245if (!newRHS)
3246 newRHS =PoisonValue::get(newLHS->getType());
3247returnnewShuffleVectorInst(newLHS, newRHS, newMask);
3248 }
3249
3250return MadeChange ? &SVI :nullptr;
3251}
S1
static const LLT S1
Definition:AMDGPULegalizerInfo.cpp:282
PHI
Rewrite undef for PHI
Definition:AMDGPURewriteUndefForPHI.cpp:100
APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
ArrayRef.h
Casting.h
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
DenseMap.h
This file defines the DenseMap class.
DerivedTypes.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
GEP
Hexagon Common GEP
Definition:HexagonCommonGEP.cpp:170
BasicBlock.h
Constant.h
Instruction.h
Operator.h
Type.h
User.h
Value.h
InstCombineInternal.h
This file provides internal interfaces used to implement the InstCombine.
foldConstantInsEltIntoShuffle
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
Definition:InstCombineVectorOps.cpp:1472
evaluateInDifferentElementOrder
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
Definition:InstCombineVectorOps.cpp:1991
collectSingleShuffleElements
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Definition:InstCombineVectorOps.cpp:621
collectShuffleElements
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
Definition:InstCombineVectorOps.cpp:781
findDemandedEltsByAllUsers
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
Definition:InstCombineVectorOps.cpp:367
foldTruncInsEltPair
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
Definition:InstCombineVectorOps.cpp:1609
foldSelectShuffleWith1Binop
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
Definition:InstCombineVectorOps.cpp:2212
foldIdentityPaddedShuffles
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
Definition:InstCombineVectorOps.cpp:2761
foldIdentityExtractShuffle
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
Definition:InstCombineVectorOps.cpp:2623
foldInsEltIntoSplat
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
Definition:InstCombineVectorOps.cpp:1353
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition:InstCombineVectorOps.cpp:779
foldShuffleWithInsert
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
Definition:InstCombineVectorOps.cpp:2673
canonicalizeInsertSplat
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
Definition:InstCombineVectorOps.cpp:2281
foldTruncShuffle
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
Definition:InstCombineVectorOps.cpp:2452
replaceExtractElements
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
Definition:InstCombineVectorOps.cpp:694
cheapToScalarize
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
Definition:InstCombineVectorOps.cpp:58
buildNew
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
Definition:InstCombineVectorOps.cpp:1915
canEvaluateShuffled
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
Definition:InstCombineVectorOps.cpp:1827
foldSelectShuffleOfSelectShuffle
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
Definition:InstCombineVectorOps.cpp:2165
hoistInsEltConst
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
Definition:InstCombineVectorOps.cpp:1449
findDemandedEltsBySingleUser
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
Definition:InstCombineVectorOps.cpp:323
foldShuffleOfUnaryOps
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
Definition:InstCombineVectorOps.cpp:2527
foldCastShuffle
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
Definition:InstCombineVectorOps.cpp:2574
narrowInsElt
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
Definition:InstCombineVectorOps.cpp:1576
isShuffleEquivalentToSelect
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
Definition:InstCombineVectorOps.cpp:1251
foldInsSequenceIntoSplat
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
Definition:InstCombineVectorOps.cpp:1279
foldInsEltIntoIdentityShuffle
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
Definition:InstCombineVectorOps.cpp:1390
getPreferredVectorIndex
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
Definition:InstCombineVectorOps.cpp:390
isShuffleExtractingFromLHS
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
Definition:InstCombineVectorOps.cpp:2099
getAlternateBinop
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
Definition:InstCombineVectorOps.cpp:2130
InstCombiner.h
This file provides the interface for the instcombine pass implementation.
InstrTypes.h
InstructionSimplify.h
Instructions.h
isSplat
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
Definition:LowerMatrixIntrinsics.cpp:102
I
#define I(x, y, z)
Definition:MD5.cpp:58
II
uint64_t IntrinsicInst * II
Definition:NVVMIntrRange.cpp:51
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PatternMatch.h
Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition:RISCVRedundantCopyElimination.cpp:75
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.
SmallBitVector.h
This file implements the SmallBitVector class.
SmallVector.h
This file defines the SmallVector class.
Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition:Statistic.h:166
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition:VPlanSLP.cpp:191
VectorUtils.h
narrowVectorSelect
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Definition:X86ISelLowering.cpp:46752
RHS
Value * RHS
Definition:X86PartialReduction.cpp:74
LHS
Value * LHS
Definition:X86PartialReduction.cpp:73
IV
static const uint32_t IV[8]
Definition:blake3_impl.h:78
VectorType
Definition:ItaniumDemangle.h:1173
bool
llvm::APInt
Class for arbitrary precision integers.
Definition:APInt.h:78
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition:APInt.h:234
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition:APInt.cpp:1007
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition:APInt.h:1492
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition:APInt.h:1330
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition:APInt.h:371
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition:APInt.h:1111
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition:APInt.h:239
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition:ArrayRef.h:171
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition:ArrayRef.h:168
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition:ArrayRef.h:198
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition:BasicBlock.cpp:437
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition:BasicBlock.h:220
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition:BasicBlock.h:177
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition:BasicBlock.h:240
llvm::BinaryOperator
Definition:InstrTypes.h:170
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition:InstrTypes.h:370
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition:Instructions.cpp:2639
llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition:InstrTypes.h:218
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition:Instructions.h:4894
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition:Instructions.h:1514
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition:Instructions.cpp:2972
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition:InstrTypes.h:661
llvm::CmpInst::CreateWithCopiedFlags
static CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
Definition:Instructions.cpp:3453
llvm::CmpInst::getOpcode
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition:InstrTypes.h:758
llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition:CmpPredicate.h:22
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition:Constants.cpp:1672
llvm::ConstantExpr::getShuffleVector
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition:Constants.cpp:2600
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition:Constants.cpp:2692
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition:Constants.h:83
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition:Constants.h:258
llvm::ConstantInt::getBitWidth
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition:Constants.h:151
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition:Constants.h:157
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition:Constants.h:148
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition:Constants.cpp:1421
llvm::Constant
This is an important base class in LLVM.
Definition:Constant.h:42
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition:Constants.cpp:420
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition:Constants.cpp:435
llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition:DWARFExpression.h:32
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition:DataLayout.h:198
llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition:DataLayout.h:617
llvm::DenseMapBase::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition:DenseMap.h:226
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition:DenseMap.h:211
llvm::DenseMap
Definition:DenseMap.h:727
llvm::ElementCount
Definition:TypeSize.h:300
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition:Instructions.h:1775
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition:Instructions.h:1788
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition:Instructions.h:1799
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition:Instructions.h:1800
llvm::ExtractElementInst::getVectorOperandType
VectorType * getVectorOperandType() const
Definition:Instructions.h:1804
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition:Type.cpp:791
llvm::Function
Definition:Function.h:63
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition:Instructions.h:933
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition:Instructions.h:956
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition:Instructions.cpp:1556
llvm::IRBuilderBase::InsertPointGuard
Definition:IRBuilder.h:394
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition:IRBuilder.h:113
llvm::IRBuilderBase::CreateFCmp
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition:IRBuilder.h:2390
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition:IRBuilder.h:2511
llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition:IRBuilder.h:2499
llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition:IRBuilder.h:1480
llvm::IRBuilderBase::CreateCast
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition:IRBuilder.h:2186
llvm::IRBuilderBase::CreateGEP
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition:IRBuilder.h:1874
llvm::IRBuilderBase::getInt64
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition:IRBuilder.h:510
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition:IRBuilder.h:2152
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition:IRBuilder.h:2533
llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition:IRBuilder.h:2019
llvm::IRBuilderBase::CreateBinOp
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition:IRBuilder.h:1671
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition:IRBuilder.h:199
llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition:IRBuilder.h:2380
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition:Instructions.h:1834
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition:Instructions.h:1848
llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition:Instructions.h:1862
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition:Instructions.h:2485
llvm::InstCombinerImpl
Definition:InstCombineInternal.h:61
llvm::InstCombinerImpl::FoldOpIntoSelect
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Definition:InstructionCombining.cpp:1687
llvm::InstCombinerImpl::foldOpIntoPhi
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Definition:InstructionCombining.cpp:1770
llvm::InstCombinerImpl::SimplifyDemandedVectorElts
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Definition:InstCombineSimplifyDemanded.cpp:1396
llvm::InstCombinerImpl::foldSelectShuffle
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Definition:InstCombineVectorOps.cpp:2313
llvm::InstCombinerImpl::visitInsertValueInst
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Definition:InstCombineVectorOps.cpp:1214
llvm::InstCombinerImpl::visitInsertElementInst
Instruction * visitInsertElementInst(InsertElementInst &IE)
Definition:InstCombineVectorOps.cpp:1672
llvm::InstCombinerImpl::visitExtractElementInst
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Definition:InstCombineVectorOps.cpp:398
llvm::InstCombinerImpl::simplifyBinOpSplats
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Definition:InstCombineVectorOps.cpp:2830
llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Definition:InstCombineVectorOps.cpp:868
llvm::InstCombinerImpl::visitShuffleVectorInst
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Definition:InstCombineVectorOps.cpp:2857
llvm::InstCombiner::SQ
SimplifyQuery SQ
Definition:InstCombiner.h:77
llvm::InstCombiner::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition:InstCombiner.h:388
llvm::InstCombiner::Worklist
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition:InstCombiner.h:65
llvm::InstCombiner::InsertNewInstWith
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition:InstCombiner.h:377
llvm::InstCombiner::DL
const DataLayout & DL
Definition:InstCombiner.h:76
llvm::InstCombiner::addToWorklist
void addToWorklist(Instruction *I)
Definition:InstCombiner.h:332
llvm::InstCombiner::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition:InstCombiner.h:412
llvm::InstCombiner::getSafeVectorConstantForBinop
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition:InstCombiner.h:280
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition:InstCombiner.h:61
llvm::InstCombiner::getSimplifyQuery
const SimplifyQuery & getSimplifyQuery() const
Definition:InstCombiner.h:338
llvm::InstructionWorklist::addValue
void addValue(Value *V)
Add value to the worklist if it is an instruction.
Definition:InstructionWorklist.h:51
llvm::Instruction
Definition:Instruction.h:68
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
Definition:Instruction.cpp:403
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
Definition:Instruction.cpp:410
llvm::Instruction::copyIRFlags
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
Definition:Instruction.cpp:660
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition:Instruction.cpp:68
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition:Instruction.cpp:704
llvm::Instruction::setFastMathFlags
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Definition:Instruction.cpp:601
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition:Instruction.h:169
llvm::Instruction::isExact
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
Definition:Instruction.cpp:557
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition:Instruction.h:310
llvm::Instruction::BinaryOps
BinaryOps
Definition:Instruction.h:1008
llvm::Instruction::isShift
bool isShift() const
Definition:Instruction.h:318
llvm::Instruction::dropPoisonGeneratingFlags
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition:Instruction.cpp:426
llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition:Instruction.h:316
llvm::Instruction::CastOps
CastOps
Definition:Instruction.h:1022
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition:IntrinsicInst.h:48
llvm::PHINode
Definition:Instructions.h:2600
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition:Instructions.h:2735
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition:Instructions.h:2695
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition:Instructions.h:2675
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition:Instructions.h:2671
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition:Instructions.h:2635
llvm::PoisonValue
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition:Constants.h:1460
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition:Constants.cpp:1878
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition:Instructions.h:1657
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition:Instructions.h:1682
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition:Instructions.h:1901
llvm::ShuffleVectorInst::changesLength
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
Definition:Instructions.h:1980
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition:Instructions.h:1950
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition:Instructions.cpp:1925
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition:Instructions.h:1941
llvm::ShuffleVectorInst::increasesLength
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
Definition:Instructions.h:1991
llvm::ShuffleVectorInst::isIdentityWithExtract
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
Definition:Instructions.cpp:2136
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition:Instructions.cpp:1788
llvm::ShuffleVectorInst::isSelect
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
Definition:Instructions.h:2090
llvm::ShuffleVectorInst::isIdentityMask
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition:Instructions.cpp:1883
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition:Instructions.h:2308
llvm::ShuffleVectorInst::commute
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
Definition:Instructions.cpp:1700
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition:SmallBitVector.h:35
llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition:SmallBitVector.h:216
llvm::SmallDenseMap
Definition:DenseMap.h:883
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition:SmallVector.h:937
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::TruncInst
This class represents a truncation of integer types.
Definition:Instructions.h:4503
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition:Type.h:270
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition:Type.h:243
llvm::Type::getStructNumElements
unsigned getStructNumElements() const
llvm::Type::getArrayNumElements
uint64_t getArrayNumElements() const
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition:Type.h:74
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition:Type.h:73
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition:Type.h:184
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition:Type.h:136
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
llvm::UnaryOperator
Definition:InstrTypes.h:100
llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition:InstrTypes.h:138
llvm::UnaryOperator::getOpcode
UnaryOps getOpcode() const
Definition:InstrTypes.h:153
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition:InstrTypes.h:146
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition:Constants.cpp:1859
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition:Use.h:43
llvm::User
Definition:User.h:44
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition:User.h:228
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition:Value.cpp:1067
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition:Value.h:434
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition:Value.cpp:534
llvm::Value::users
iterator_range< user_iterator > users()
Definition:Value.h:421
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition:Value.cpp:1075
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition:Value.cpp:309
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition:DerivedTypes.h:665
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
llvm::VectorType::getElementType
Type * getElementType() const
Definition:DerivedTypes.h:460
llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition:TypeSize.h:171
llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition:TypeSize.h:168
llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition:ilist_node.h:32
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition:ilist_node.h:132
uint64_t
unsigned
ErrorHandling.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::Intrinsic::getOrInsertDeclaration
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition:Intrinsics.cpp:732
llvm::M68k::MemAddrModeKind::V
@ V
llvm::MipsISD::Ext
@ Ext
Definition:MipsISelLowering.h:157
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition:NVPTX.h:162
llvm::PatternMatch::m_Poison
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition:PatternMatch.h:160
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition:PatternMatch.h:100
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition:PatternMatch.h:165
llvm::PatternMatch::m_Trunc
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition:PatternMatch.h:2075
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition:PatternMatch.h:982
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition:PatternMatch.h:49
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition:PatternMatch.h:885
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition:PatternMatch.h:1837
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition:PatternMatch.h:168
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition:PatternMatch.h:1799
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition:PatternMatch.h:599
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition:PatternMatch.h:67
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition:PatternMatch.h:2820
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition:PatternMatch.h:1911
llvm::PatternMatch::m_ImmConstant
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition:PatternMatch.h:864
llvm::PatternMatch::m_FPExt
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
Definition:PatternMatch.h:2176
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition:PatternMatch.h:1923
llvm::PatternMatch::m_ZExt
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition:PatternMatch.h:2107
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition:PatternMatch.h:105
llvm::PatternMatch::m_BitCast
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition:PatternMatch.h:2021
llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition:PatternMatch.h:95
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition:PatternMatch.h:92
llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition:PatternMatch.h:1240
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition:PatternMatch.h:1156
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition:PatternMatch.h:152
llvm::PatternMatch::m_SExt
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
Definition:PatternMatch.h:2101
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition:PatternMatch.h:612
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition:PatternMatch.h:1829
llvm::PatternMatch::m_FAbs
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
Definition:PatternMatch.h:2702
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition:PatternMatch.h:239
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::isSafeToSpeculativelyExecuteWithVariableReplaced
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I)
Don't use information from its non-constant operands.
Definition:ValueTracking.h:847
llvm::Depth
@ Depth
Definition:SIMachineScheduler.h:36
llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition:STLExtras.h:2448
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition:VectorUtils.cpp:1053
llvm::simplifyShuffleVectorInst
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
Definition:InstructionSimplify.cpp:5568
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition:MathExtras.h:292
llvm::ComplexDeinterleavingOperation::Splat
@ Splat
llvm::simplifyInsertValueInst
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
Definition:InstructionSimplify.cpp:5177
llvm::ConstantFoldBinaryOpOperands
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
Definition:ConstantFolding.cpp:1300
llvm::PoisonMaskElem
constexpr int PoisonMaskElem
Definition:Instructions.h:1889
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition:VectorUtils.cpp:226
llvm::simplifyInsertElementInst
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
Definition:InstructionSimplify.cpp:5183
llvm::BitWidth
constexpr unsigned BitWidth
Definition:BitmaskEnum.h:217
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition:STLExtras.h:1945
llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition:MachineBasicBlock.h:1377
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition:STLExtras.h:1903
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition:CFG.h:118
llvm::isKnownNeverNaN
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Definition:ValueTracking.h:601
llvm::isGuaranteedNotToBePoison
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Definition:ValueTracking.cpp:7856
llvm::simplifyExtractElementInst
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
Definition:InstructionSimplify.cpp:5299
llvm::scaleShuffleMaskElts
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
Definition:VectorUtils.cpp:517
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860
BinopElts
These are the ingredients in an alternate form binary operator as described below.
Definition:InstCombineVectorOps.cpp:2116
BinopElts::BinopElts
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
Definition:InstCombineVectorOps.cpp:2120
BinopElts::Opcode
BinaryOperator::BinaryOps Opcode
Definition:InstCombineVectorOps.cpp:2117
BinopElts::Op1
Value * Op1
Definition:InstCombineVectorOps.cpp:2119
BinopElts::Op0
Value * Op0
Definition:InstCombineVectorOps.cpp:2118
llvm::PatternMatch::m_Mask
Definition:PatternMatch.h:1859
llvm::PatternMatch::m_ZeroMask
Definition:PatternMatch.h:1868
llvm::SimplifyQuery
Definition:SimplifyQuery.h:70
llvm::SimplifyQuery::getWithInstruction
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition:SimplifyQuery.h:107

Generated on Sun Jul 20 2025 13:22:48 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp