Movatterモバイル変換

Go to the documentation of this file.

1//===- InstCombineVectorOps.cpp -------------------------------------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This file implements instcombine for ExtractElement, InsertElement and

10// ShuffleVector.

11//

12//===----------------------------------------------------------------------===//

14#include "InstCombineInternal.h"

15#include "llvm/ADT/APInt.h"

16#include "llvm/ADT/ArrayRef.h"

17#include "llvm/ADT/DenseMap.h"

18#include "llvm/ADT/STLExtras.h"

19#include "llvm/ADT/SmallBitVector.h"

20#include "llvm/ADT/SmallVector.h"

21#include "llvm/ADT/Statistic.h"

22#include "llvm/Analysis/InstructionSimplify.h"

23#include "llvm/Analysis/VectorUtils.h"

24#include "llvm/IR/BasicBlock.h"

25#include "llvm/IR/Constant.h"

26#include "llvm/IR/Constants.h"

27#include "llvm/IR/DerivedTypes.h"

28#include "llvm/IR/InstrTypes.h"

29#include "llvm/IR/Instruction.h"

30#include "llvm/IR/Instructions.h"

31#include "llvm/IR/Operator.h"

32#include "llvm/IR/PatternMatch.h"

33#include "llvm/IR/Type.h"

34#include "llvm/IR/User.h"

35#include "llvm/IR/Value.h"

36#include "llvm/Support/Casting.h"

37#include "llvm/Support/ErrorHandling.h"

38#include "llvm/Transforms/InstCombine/InstCombiner.h"

39#include <cassert>

40#include <cstdint>

41#include <iterator>

42#include <utility>

44#define DEBUG_TYPE "instcombine"

46using namespacellvm;

47using namespacePatternMatch;

49STATISTIC(NumAggregateReconstructionsSimplified,

50"Number of aggregate reconstructions turned into reuse of the "

51"original aggregate");

53/// Return true if the value is cheaper to scalarize than it is to leave as a

54/// vector operation. If the extract index \p EI is a constant integer then

55/// some operations may be cheap to scalarize.

56///

57/// FIXME: It's possible to create more instructions than previously existed.

58staticboolcheapToScalarize(Value *V,Value *EI) {

59ConstantInt *CEI = dyn_cast<ConstantInt>(EI);

61// If we can pick a scalar constant value out of a vector, that is free.

62if (auto *C = dyn_cast<Constant>(V))

63return CEI ||C->getSplatValue();

65if (CEI &&match(V, m_Intrinsic<Intrinsic::stepvector>())) {

66ElementCount EC = cast<VectorType>(V->getType())->getElementCount();

67// Index needs to be lower than the minimum size of the vector, because

68// for scalable vector, the vector size is known at run time.

69return CEI->getValue().ult(EC.getKnownMinValue());

70 }

72// An insertelement to the same constant index as our extract will simplify

73// to the scalar inserted element. An insertelement to a different constant

74// index is irrelevant to our extract.

75if (match(V,m_InsertElt(m_Value(),m_Value(),m_ConstantInt())))

76return CEI;

78if (match(V,m_OneUse(m_Load(m_Value()))))

79returntrue;

81if (match(V,m_OneUse(m_UnOp())))

82returntrue;

84Value *V0, *V1;

85if (match(V,m_OneUse(m_BinOp(m_Value(V0),m_Value(V1)))))

86if (cheapToScalarize(V0, EI) ||cheapToScalarize(V1, EI))

87returntrue;

89CmpPredicate UnusedPred;

90if (match(V,m_OneUse(m_Cmp(UnusedPred,m_Value(V0),m_Value(V1)))))

91if (cheapToScalarize(V0, EI) ||cheapToScalarize(V1, EI))

92returntrue;

94returnfalse;

95}

97// If we have a PHI node with a vector type that is only used to feed

98// itself and be an operand of extractelement at a constant location,

99// try to replace the PHI of the vector type with a PHI of a scalar type.

100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,

101PHINode *PN) {

102SmallVector<Instruction *, 2> Extracts;

103// The users we want the PHI to have are:

104// 1) The EI ExtractElement (we already know this)

105// 2) Possibly more ExtractElements with the same index.

106// 3) Another operand, which will feed back into the PHI.

107Instruction *PHIUser =nullptr;

108for (auto *U : PN->users()) {

109if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {

110if (EI.getIndexOperand() == EU->getIndexOperand())

111 Extracts.push_back(EU);

112else

113returnnullptr;

114 }elseif (!PHIUser) {

115 PHIUser = cast<Instruction>(U);

116 }else {

117returnnullptr;

118 }

119 }

120

121if (!PHIUser)

122returnnullptr;

123

124// Verify that this PHI user has one use, which is the PHI itself,

125// and that it is a binary operation which is cheap to scalarize.

126// otherwise return nullptr.

127if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||

128 !(isa<BinaryOperator>(PHIUser)) ||

129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))

130returnnullptr;

131

132// Create a scalar PHI node that will replace the vector PHI node

133// just before the current PHI node.

134PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(

135PHINode::Create(EI.getType(), PN->getNumIncomingValues(),""), PN->getIterator()));

136// Scalarize each PHI operand.

137for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {

138Value *PHIInVal = PN->getIncomingValue(i);

139BasicBlock *inBB = PN->getIncomingBlock(i);

140Value *Elt = EI.getIndexOperand();

141// If the operand is the PHI induction variable:

142if (PHIInVal == PHIUser) {

143// Scalarize the binary operation. Its first operand is the

144// scalar PHI, and the second operand is extracted from the other

145// vector operand.

146BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);

147unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;

148Value *Op =InsertNewInstWith(

149ExtractElementInst::Create(B0->getOperand(opId), Elt,

150 B0->getOperand(opId)->getName() +".Elt"),

151 B0->getIterator());

152Value *newPHIUser =InsertNewInstWith(

153BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),

154 scalarPHI,Op, B0), B0->getIterator());

155 scalarPHI->addIncoming(newPHIUser, inBB);

156 }else {

157// Scalarize PHI input:

158Instruction *newEI =ExtractElementInst::Create(PHIInVal, Elt,"");

159// Insert the new instruction into the predecessor basic block.

160Instruction *pos = dyn_cast<Instruction>(PHIInVal);

161BasicBlock::iterator InsertPos;

162if (pos && !isa<PHINode>(pos)) {

163 InsertPos = ++pos->getIterator();

164 }else {

165 InsertPos = inBB->getFirstInsertionPt();

166 }

167

168InsertNewInstWith(newEI, InsertPos);

169

170 scalarPHI->addIncoming(newEI, inBB);

171 }

172 }

173

174for (auto *E : Extracts) {

175replaceInstUsesWith(*E, scalarPHI);

176// Add old extract to worklist for DCE.

177addToWorklist(E);

178 }

179

180return &EI;

181}

182

183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {

184Value *X;

185uint64_t ExtIndexC;

186if (!match(Ext.getVectorOperand(),m_BitCast(m_Value(X))) ||

187 !match(Ext.getIndexOperand(),m_ConstantInt(ExtIndexC)))

188returnnullptr;

189

190ElementCount NumElts =

191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();

192Type *DestTy =Ext.getType();

193unsigned DestWidth = DestTy->getPrimitiveSizeInBits();

194bool IsBigEndian =DL.isBigEndian();

195

196// If we are casting an integer to vector and extracting a portion, that is

197// a shift-right and truncate.

198if (X->getType()->isIntegerTy()) {

199assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&

200"Expected fixed vector type for bitcast from scalar integer");

201

202// Big endian requires adjusting the extract index since MSB is at index 0.

203// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8

204// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8

205if (IsBigEndian)

206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;

207unsigned ShiftAmountC = ExtIndexC * DestWidth;

208if ((!ShiftAmountC ||

209 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&

210Ext.getVectorOperand()->hasOneUse()) {

211if (ShiftAmountC)

212X =Builder.CreateLShr(X, ShiftAmountC,"extelt.offset");

213if (DestTy->isFloatingPointTy()) {

214Type *DstIntTy =IntegerType::getIntNTy(X->getContext(), DestWidth);

215Value *Trunc =Builder.CreateTrunc(X, DstIntTy);

216returnnewBitCastInst(Trunc, DestTy);

217 }

218returnnewTruncInst(X, DestTy);

219 }

220 }

221

222if (!X->getType()->isVectorTy())

223returnnullptr;

224

225// If this extractelement is using a bitcast from a vector of the same number

226// of elements, see if we can find the source element from the source vector:

227// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]

228auto *SrcTy = cast<VectorType>(X->getType());

229ElementCount NumSrcElts = SrcTy->getElementCount();

230if (NumSrcElts == NumElts)

231if (Value *Elt =findScalarElement(X, ExtIndexC))

232returnnewBitCastInst(Elt, DestTy);

233

234assert(NumSrcElts.isScalable() == NumElts.isScalable() &&

235"Src and Dst must be the same sort of vector type");

236

237// If the source elements are wider than the destination, try to shift and

238// truncate a subset of scalar bits of an insert op.

239if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {

240Value *Scalar;

241Value *Vec;

242uint64_t InsIndexC;

243if (!match(X,m_InsertElt(m_Value(Vec),m_Value(Scalar),

244m_ConstantInt(InsIndexC))))

245returnnullptr;

246

247// The extract must be from the subset of vector elements that we inserted

248// into. Example: if we inserted element 1 of a <2 x i64> and we are

249// extracting an i16 (narrowing ratio = 4), then this extract must be from 1

250// of elements 4-7 of the bitcasted vector.

251unsigned NarrowingRatio =

252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();

253

254if (ExtIndexC / NarrowingRatio != InsIndexC) {

255// Remove insertelement, if we don't use the inserted element.

256// extractelement (bitcast (insertelement (Vec, b)), a) ->

257// extractelement (bitcast (Vec), a)

258// FIXME: this should be removed to SimplifyDemandedVectorElts,

259// once scale vectors are supported.

260if (X->hasOneUse() &&Ext.getVectorOperand()->hasOneUse()) {

261Value *NewBC =Builder.CreateBitCast(Vec,Ext.getVectorOperandType());

262returnExtractElementInst::Create(NewBC,Ext.getIndexOperand());

263 }

264returnnullptr;

265 }

266

267// We are extracting part of the original scalar. How that scalar is

268// inserted into the vector depends on the endian-ness. Example:

269// Vector Byte Elt Index: 0 1 2 3 4 5 6 7

270// +--+--+--+--+--+--+--+--+

271// inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|

272// extelt <4 x i16> V', 3: | |S2|S3|

273// +--+--+--+--+--+--+--+--+

274// If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.

275// If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.

276// In this example, we must right-shift little-endian. Big-endian is just a

277// truncate.

278unsigned Chunk = ExtIndexC % NarrowingRatio;

279if (IsBigEndian)

280 Chunk = NarrowingRatio - 1 - Chunk;

281

282// Bail out if this is an FP vector to FP vector sequence. That would take

283// more instructions than we started with unless there is no shift, and it

284// may not be handled as well in the backend.

285bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();

286bool NeedDestBitcast = DestTy->isFloatingPointTy();

287if (NeedSrcBitcast && NeedDestBitcast)

288returnnullptr;

289

290unsigned SrcWidth = SrcTy->getScalarSizeInBits();

291unsigned ShAmt = Chunk * DestWidth;

292

293// TODO: This limitation is more strict than necessary. We could sum the

294// number of new instructions and subtract the number eliminated to know if

295// we can proceed.

296if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())

297if (NeedSrcBitcast || NeedDestBitcast)

298returnnullptr;

299

300if (NeedSrcBitcast) {

301Type *SrcIntTy =IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);

302Scalar =Builder.CreateBitCast(Scalar, SrcIntTy);

303 }

304

305if (ShAmt) {

306// Bail out if we could end with more instructions than we started with.

307if (!Ext.getVectorOperand()->hasOneUse())

308returnnullptr;

309Scalar =Builder.CreateLShr(Scalar, ShAmt);

310 }

311

312if (NeedDestBitcast) {

313Type *DestIntTy =IntegerType::getIntNTy(Scalar->getContext(), DestWidth);

314returnnewBitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);

315 }

316returnnewTruncInst(Scalar, DestTy);

317 }

318

319returnnullptr;

320}

321

322/// Find elements of V demanded by UserInstr.

323staticAPInt findDemandedEltsBySingleUser(Value *V,Instruction *UserInstr) {

324unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();

325

326// Conservatively assume that all elements are needed.

327APInt UsedElts(APInt::getAllOnes(VWidth));

328

329switch (UserInstr->getOpcode()) {

330case Instruction::ExtractElement: {

331ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);

332assert(EEI->getVectorOperand() == V);

333ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());

334if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {

335 UsedElts =APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());

336 }

337break;

338 }

339case Instruction::ShuffleVector: {

340ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);

341unsigned MaskNumElts =

342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();

343

344 UsedElts =APInt(VWidth, 0);

345for (unsigned i = 0; i < MaskNumElts; i++) {

346unsigned MaskVal = Shuffle->getMaskValue(i);

347if (MaskVal == -1u || MaskVal >= 2 * VWidth)

348continue;

349if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))

350 UsedElts.setBit(MaskVal);

351if (Shuffle->getOperand(1) == V &&

352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))

353 UsedElts.setBit(MaskVal - VWidth);

354 }

355break;

356 }

357default:

358break;

359 }

360return UsedElts;

361}

362

363/// Find union of elements of V demanded by all its users.

364/// If it is known by querying findDemandedEltsBySingleUser that

365/// no user demands an element of V, then the corresponding bit

366/// remains unset in the returned value.

367staticAPInt findDemandedEltsByAllUsers(Value *V) {

368unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();

369

370APInt UnionUsedElts(VWidth, 0);

371for (constUse &U : V->uses()) {

372if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {

373 UnionUsedElts |=findDemandedEltsBySingleUser(V,I);

374 }else {

375 UnionUsedElts =APInt::getAllOnes(VWidth);

376break;

377 }

378

379if (UnionUsedElts.isAllOnes())

380break;

381 }

382

383return UnionUsedElts;

384}

385

386/// Given a constant index for a extractelement or insertelement instruction,

387/// return it with the canonical type if it isn't already canonical. We

388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't

389/// matter, we just want a consistent type to simplify CSE.

390staticConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {

391constunsigned IndexBW = IndexC->getBitWidth();

392if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)

393returnnullptr;

394return ConstantInt::get(IndexC->getContext(),

395 IndexC->getValue().zextOrTrunc(64));

396}

397

398Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {

399Value *SrcVec = EI.getVectorOperand();

400Value *Index = EI.getIndexOperand();

401if (Value *V =simplifyExtractElementInst(SrcVec, Index,

402SQ.getWithInstruction(&EI)))

403returnreplaceInstUsesWith(EI, V);

404

405// extractelt (select %x, %vec1, %vec2), %const ->

406// select %x, %vec1[%const], %vec2[%const]

407// TODO: Support constant folding of multiple select operands:

408// extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)

409// If the extractelement will for instance try to do out of bounds accesses

410// because of the values of %c1 and/or %c2, the sequence could be optimized

411// early. This is currently not possible because constant folding will reach

412// an unreachable assertion if it doesn't find a constant operand.

413if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))

414if (SI->getCondition()->getType()->isIntegerTy() &&

415 isa<Constant>(EI.getIndexOperand()))

416if (Instruction *R =FoldOpIntoSelect(EI, SI))

417return R;

418

419// If extracting a specified index from the vector, see if we can recursively

420// find a previously computed scalar that was inserted into the vector.

421auto *IndexC = dyn_cast<ConstantInt>(Index);

422bool HasKnownValidIndex =false;

423if (IndexC) {

424// Canonicalize type of constant indices to i64 to simplify CSE

425if (auto *NewIdx =getPreferredVectorIndex(IndexC))

426returnreplaceOperand(EI, 1, NewIdx);

427

428ElementCount EC = EI.getVectorOperandType()->getElementCount();

429unsigned NumElts = EC.getKnownMinValue();

430 HasKnownValidIndex = IndexC->getValue().ult(NumElts);

431

432if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {

433Intrinsic::ID IID =II->getIntrinsicID();

434// Index needs to be lower than the minimum size of the vector, because

435// for scalable vector, the vector size is known at run time.

436if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {

437Type *Ty = EI.getType();

438unsignedBitWidth = Ty->getIntegerBitWidth();

439Value *Idx;

440// Return index when its value does not exceed the allowed limit

441// for the element type of the vector, otherwise return undefined.

442if (IndexC->getValue().getActiveBits() <=BitWidth)

443Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));

444else

445Idx =PoisonValue::get(Ty);

446returnreplaceInstUsesWith(EI,Idx);

447 }

448 }

449

450// InstSimplify should handle cases where the index is invalid.

451// For fixed-length vector, it's invalid to extract out-of-range element.

452if (!EC.isScalable() && IndexC->getValue().uge(NumElts))

453returnnullptr;

454

455if (Instruction *I = foldBitcastExtElt(EI))

456returnI;

457

458// If there's a vector PHI feeding a scalar use through this extractelement

459// instruction, try to scalarize the PHI.

460if (auto *Phi = dyn_cast<PHINode>(SrcVec))

461if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))

462return ScalarPHI;

463 }

464

465// TODO come up with a n-ary matcher that subsumes both unary and

466// binary matchers.

467UnaryOperator *UO;

468if (match(SrcVec,m_UnOp(UO)) &&cheapToScalarize(SrcVec, Index)) {

469// extelt (unop X), Index --> unop (extelt X, Index)

470Value *X = UO->getOperand(0);

471Value *E =Builder.CreateExtractElement(X, Index);

472returnUnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);

473 }

474

475// If the binop is not speculatable, we cannot hoist the extractelement if

476// it may make the operand poison.

477BinaryOperator *BO;

478if (match(SrcVec,m_BinOp(BO)) &&cheapToScalarize(SrcVec, Index) &&

479 (HasKnownValidIndex ||

480isSafeToSpeculativelyExecuteWithVariableReplaced(BO))) {

481// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)

482Value *X = BO->getOperand(0), *Y = BO->getOperand(1);

483Value *E0 =Builder.CreateExtractElement(X, Index);

484Value *E1 =Builder.CreateExtractElement(Y, Index);

485returnBinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);

486 }

487

488Value *X, *Y;

489CmpPredicate Pred;

490if (match(SrcVec,m_Cmp(Pred,m_Value(X),m_Value(Y))) &&

491cheapToScalarize(SrcVec, Index)) {

492// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)

493Value *E0 =Builder.CreateExtractElement(X, Index);

494Value *E1 =Builder.CreateExtractElement(Y, Index);

495CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);

496returnCmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,

497 SrcCmpInst);

498 }

499

500if (auto *I = dyn_cast<Instruction>(SrcVec)) {

501if (auto *IE = dyn_cast<InsertElementInst>(I)) {

502// instsimplify already handled the case where the indices are constants

503// and equal by value, if both are constants, they must not be the same

504// value, extract from the pre-inserted value instead.

505if (isa<Constant>(IE->getOperand(2)) && IndexC)

506returnreplaceOperand(EI, 0, IE->getOperand(0));

507 }elseif (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {

508auto *VecType = cast<VectorType>(GEP->getType());

509ElementCount EC = VecType->getElementCount();

510uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;

511if (IndexC && IdxVal < EC.getKnownMinValue() &&GEP->hasOneUse()) {

512// Find out why we have a vector result - these are a few examples:

513// 1. We have a scalar pointer and a vector of indices, or

514// 2. We have a vector of pointers and a scalar index, or

515// 3. We have a vector of pointers and a vector of indices, etc.

516// Here we only consider combining when there is exactly one vector

517// operand, since the optimization is less obviously a win due to

518// needing more than one extractelements.

519

520unsigned VectorOps =

521llvm::count_if(GEP->operands(), [](constValue *V) {

522 return isa<VectorType>(V->getType());

523 });

524if (VectorOps == 1) {

525Value *NewPtr =GEP->getPointerOperand();

526if (isa<VectorType>(NewPtr->getType()))

527 NewPtr =Builder.CreateExtractElement(NewPtr, IndexC);

528

529SmallVector<Value *> NewOps;

530for (unsignedI = 1;I !=GEP->getNumOperands(); ++I) {

531Value *Op =GEP->getOperand(I);

532if (isa<VectorType>(Op->getType()))

533 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));

534else

535 NewOps.push_back(Op);

536 }

537

538GetElementPtrInst *NewGEP =GetElementPtrInst::Create(

539GEP->getSourceElementType(), NewPtr, NewOps);

540 NewGEP->setIsInBounds(GEP->isInBounds());

541return NewGEP;

542 }

543 }

544 }elseif (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {

545// If this is extracting an element from a shufflevector, figure out where

546// it came from and extract from the appropriate input element instead.

547// Restrict the following transformation to fixed-length vector.

548if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {

549int SrcIdx =

550 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());

551Value *Src;

552unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())

553 ->getNumElements();

554

555if (SrcIdx < 0)

556returnreplaceInstUsesWith(EI,PoisonValue::get(EI.getType()));

557if (SrcIdx < (int)LHSWidth)

558 Src = SVI->getOperand(0);

559else {

560 SrcIdx -= LHSWidth;

561 Src = SVI->getOperand(1);

562 }

563Type *Int64Ty =Type::getInt64Ty(EI.getContext());

564returnExtractElementInst::Create(

565 Src, ConstantInt::get(Int64Ty, SrcIdx,false));

566 }

567 }elseif (auto *CI = dyn_cast<CastInst>(I)) {

568// Canonicalize extractelement(cast) -> cast(extractelement).

569// Bitcasts can change the number of vector elements, and they cost

570// nothing.

571if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {

572Value *EE =Builder.CreateExtractElement(CI->getOperand(0), Index);

573returnCastInst::Create(CI->getOpcode(), EE, EI.getType());

574 }

575 }

576 }

577

578// Run demanded elements after other transforms as this can drop flags on

579// binops. If there's two paths to the same final result, we prefer the

580// one which doesn't force us to drop flags.

581if (IndexC) {

582ElementCount EC = EI.getVectorOperandType()->getElementCount();

583unsigned NumElts = EC.getKnownMinValue();

584// This instruction only demands the single element from the input vector.

585// Skip for scalable type, the number of elements is unknown at

586// compile-time.

587if (!EC.isScalable() && NumElts != 1) {

588// If the input vector has a single use, simplify it based on this use

589// property.

590if (SrcVec->hasOneUse()) {

591APInt PoisonElts(NumElts, 0);

592APInt DemandedElts(NumElts, 0);

593 DemandedElts.setBit(IndexC->getZExtValue());

594if (Value *V =

595SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))

596returnreplaceOperand(EI, 0, V);

597 }else {

598// If the input vector has multiple uses, simplify it based on a union

599// of all elements used.

600APInt DemandedElts =findDemandedEltsByAllUsers(SrcVec);

601if (!DemandedElts.isAllOnes()) {

602APInt PoisonElts(NumElts, 0);

603if (Value *V =SimplifyDemandedVectorElts(

604 SrcVec, DemandedElts, PoisonElts, 0/* Depth */,

605true/* AllowMultipleUsers */)) {

606if (V != SrcVec) {

607Worklist.addValue(SrcVec);

608 SrcVec->replaceAllUsesWith(V);

609return &EI;

610 }

611 }

612 }

613 }

614 }

615 }

616returnnullptr;

617}

618

619/// If V is a shuffle of values that ONLY returns elements from either LHS or

620/// RHS, return the shuffle mask and true. Otherwise, return false.

621staticboolcollectSingleShuffleElements(Value *V,Value *LHS,Value *RHS,

622SmallVectorImpl<int> &Mask) {

623assert(LHS->getType() ==RHS->getType() &&

624"Invalid CollectSingleShuffleElements");

625unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();

626

627if (match(V,m_Poison())) {

628 Mask.assign(NumElts, -1);

629returntrue;

630 }

631

632if (V ==LHS) {

633for (unsigned i = 0; i != NumElts; ++i)

634 Mask.push_back(i);

635returntrue;

636 }

637

638if (V ==RHS) {

639for (unsigned i = 0; i != NumElts; ++i)

640 Mask.push_back(i + NumElts);

641returntrue;

642 }

643

644if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {

645// If this is an insert of an extract from some other vector, include it.

646Value *VecOp = IEI->getOperand(0);

647Value *ScalarOp = IEI->getOperand(1);

648Value *IdxOp = IEI->getOperand(2);

649

650if (!isa<ConstantInt>(IdxOp))

651returnfalse;

652unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();

653

654if (isa<PoisonValue>(ScalarOp)) {// inserting poison into vector.

655// We can handle this if the vector we are inserting into is

656// transitively ok.

657if (collectSingleShuffleElements(VecOp,LHS,RHS, Mask)) {

658// If so, update the mask to reflect the inserted poison.

659 Mask[InsertedIdx] = -1;

660returntrue;

661 }

662 }elseif (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){

663if (isa<ConstantInt>(EI->getOperand(1))) {

664unsigned ExtractedIdx =

665 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();

666unsigned NumLHSElts =

667 cast<FixedVectorType>(LHS->getType())->getNumElements();

668

669// This must be extracting from either LHS or RHS.

670if (EI->getOperand(0) ==LHS || EI->getOperand(0) ==RHS) {

671// We can handle this if the vector we are inserting into is

672// transitively ok.

673if (collectSingleShuffleElements(VecOp,LHS,RHS, Mask)) {

674// If so, update the mask to reflect the inserted value.

675if (EI->getOperand(0) ==LHS) {

676 Mask[InsertedIdx % NumElts] = ExtractedIdx;

677 }else {

678assert(EI->getOperand(0) ==RHS);

679 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;

680 }

681returntrue;

682 }

683 }

684 }

685 }

686 }

687

688returnfalse;

689}

690

691/// If we have insertion into a vector that is wider than the vector that we

692/// are extracting from, try to widen the source vector to allow a single

693/// shufflevector to replace one or more insert/extract pairs.

694staticboolreplaceExtractElements(InsertElementInst *InsElt,

695ExtractElementInst *ExtElt,

696InstCombinerImpl &IC) {

697auto *InsVecType = cast<FixedVectorType>(InsElt->getType());

698auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());

699unsigned NumInsElts = InsVecType->getNumElements();

700unsigned NumExtElts = ExtVecType->getNumElements();

701

702// The inserted-to vector must be wider than the extracted-from vector.

703if (InsVecType->getElementType() != ExtVecType->getElementType() ||

704 NumExtElts >= NumInsElts)

705returnfalse;

706

707// Create a shuffle mask to widen the extended-from vector using poison

708// values. The mask selects all of the values of the original vector followed

709// by as many poison values as needed to create a vector of the same length

710// as the inserted-to vector.

711SmallVector<int, 16> ExtendMask;

712for (unsigned i = 0; i < NumExtElts; ++i)

713 ExtendMask.push_back(i);

714for (unsigned i = NumExtElts; i < NumInsElts; ++i)

715 ExtendMask.push_back(-1);

716

717Value *ExtVecOp = ExtElt->getVectorOperand();

718auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);

719BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))

720 ? ExtVecOpInst->getParent()

721 : ExtElt->getParent();

722

723// TODO: This restriction matches the basic block check below when creating

724// new extractelement instructions. If that limitation is removed, this one

725// could also be removed. But for now, we just bail out to ensure that we

726// will replace the extractelement instruction that is feeding our

727// insertelement instruction. This allows the insertelement to then be

728// replaced by a shufflevector. If the insertelement is not replaced, we can

729// induce infinite looping because there's an optimization for extractelement

730// that will delete our widening shuffle. This would trigger another attempt

731// here to create that shuffle, and we spin forever.

732if (InsertionBlock != InsElt->getParent())

733returnfalse;

734

735// TODO: This restriction matches the check in visitInsertElementInst() and

736// prevents an infinite loop caused by not turning the extract/insert pair

737// into a shuffle. We really should not need either check, but we're lacking

738// folds for shufflevectors because we're afraid to generate shuffle masks

739// that the backend can't handle.

740if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))

741returnfalse;

742

743auto *WideVec =newShuffleVectorInst(ExtVecOp, ExtendMask);

744

745// Insert the new shuffle after the vector operand of the extract is defined

746// (as long as it's not a PHI) or at the start of the basic block of the

747// extract, so any subsequent extracts in the same basic block can use it.

748// TODO: Insert before the earliest ExtractElementInst that is replaced.

749if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))

750 WideVec->insertAfter(ExtVecOpInst->getIterator());

751else

752 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());

753

754// Replace extracts from the original narrow vector with extracts from the new

755// wide vector.

756for (User *U : ExtVecOp->users()) {

757ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);

758if (!OldExt || OldExt->getParent() != WideVec->getParent())

759continue;

760auto *NewExt =ExtractElementInst::Create(WideVec, OldExt->getOperand(1));

761 IC.InsertNewInstWith(NewExt, OldExt->getIterator());

762 IC.replaceInstUsesWith(*OldExt, NewExt);

763// Add the old extracts to the worklist for DCE. We can't remove the

764// extracts directly, because they may still be used by the calling code.

765 IC.addToWorklist(OldExt);

766 }

767

768returntrue;

769}

770

771/// We are building a shuffle to create V, which is a sequence of insertelement,

772/// extractelement pairs. If PermittedRHS is set, then we must either use it or

773/// not rely on the second vector source. Return a std::pair containing the

774/// left and right vectors of the proposed shuffle (or 0), and set the Mask

775/// parameter as required.

776///

777/// Note: we intentionally don't try to fold earlier shuffles since they have

778/// often been chosen carefully to be efficiently implementable on the target.

779usingShuffleOps = std::pair<Value *, Value *>;

780

781staticShuffleOps collectShuffleElements(Value *V,SmallVectorImpl<int> &Mask,

782Value *PermittedRHS,

783InstCombinerImpl &IC,bool &Rerun) {

784assert(V->getType()->isVectorTy() &&"Invalid shuffle!");

785unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();

786

787if (match(V,m_Poison())) {

788 Mask.assign(NumElts, -1);

789return std::make_pair(

790 PermittedRHS ?PoisonValue::get(PermittedRHS->getType()) : V,nullptr);

791 }

792

793if (isa<ConstantAggregateZero>(V)) {

794 Mask.assign(NumElts, 0);

795return std::make_pair(V,nullptr);

796 }

797

798if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {

799// If this is an insert of an extract from some other vector, include it.

800Value *VecOp = IEI->getOperand(0);

801Value *ScalarOp = IEI->getOperand(1);

802Value *IdxOp = IEI->getOperand(2);

803

804if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {

805if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {

806unsigned ExtractedIdx =

807 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();

808unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();

809

810// Either the extracted from or inserted into vector must be RHSVec,

811// otherwise we'd end up with a shuffle of three inputs.

812if (EI->getOperand(0) == PermittedRHS || PermittedRHS ==nullptr) {

813Value *RHS = EI->getOperand(0);

814ShuffleOps LR =collectShuffleElements(VecOp, Mask,RHS, IC, Rerun);

815assert(LR.second ==nullptr || LR.second ==RHS);

816

817if (LR.first->getType() !=RHS->getType()) {

818// Although we are giving up for now, see if we can create extracts

819// that match the inserts for another round of combining.

820if (replaceExtractElements(IEI, EI, IC))

821 Rerun =true;

822

823// We tried our best, but we can't find anything compatible with RHS

824// further up the chain. Return a trivial shuffle.

825for (unsigned i = 0; i < NumElts; ++i)

826 Mask[i] = i;

827return std::make_pair(V,nullptr);

828 }

829

830unsigned NumLHSElts =

831 cast<FixedVectorType>(RHS->getType())->getNumElements();

832 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;

833return std::make_pair(LR.first,RHS);

834 }

835

836if (VecOp == PermittedRHS) {

837// We've gone as far as we can: anything on the other side of the

838// extractelement will already have been converted into a shuffle.

839unsigned NumLHSElts =

840 cast<FixedVectorType>(EI->getOperand(0)->getType())

841 ->getNumElements();

842for (unsigned i = 0; i != NumElts; ++i)

843 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);

844return std::make_pair(EI->getOperand(0), PermittedRHS);

845 }

846

847// If this insertelement is a chain that comes from exactly these two

848// vectors, return the vector and the effective shuffle.

849if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&

850collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,

851 Mask))

852return std::make_pair(EI->getOperand(0), PermittedRHS);

853 }

854 }

855 }

856

857// Otherwise, we can't do anything fancy. Return an identity vector.

858for (unsigned i = 0; i != NumElts; ++i)

859 Mask.push_back(i);

860return std::make_pair(V,nullptr);

861}

862

863/// Look for chain of insertvalue's that fully define an aggregate, and trace

864/// back the values inserted, see if they are all were extractvalue'd from

865/// the same source aggregate from the exact same element indexes.

866/// If they were, just reuse the source aggregate.

867/// This potentially deals with PHI indirections.

868Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

869InsertValueInst &OrigIVI) {

870Type *AggTy = OrigIVI.getType();

871unsigned NumAggElts;

872switch (AggTy->getTypeID()) {

873caseType::StructTyID:

874 NumAggElts = AggTy->getStructNumElements();

875break;

876caseType::ArrayTyID:

877 NumAggElts = AggTy->getArrayNumElements();

878break;

879default:

880llvm_unreachable("Unhandled aggregate type?");

881 }

882

883// Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able

884// to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),

885// FIXME: any interesting patterns to be caught with larger limit?

886assert(NumAggElts > 0 &&"Aggregate should have elements.");

887if (NumAggElts > 2)

888returnnullptr;

889

890staticconstexprauto NotFound = std::nullopt;

891staticconstexprauto FoundMismatch =nullptr;

892

893// Try to find a value of each element of an aggregate.

894// FIXME: deal with more complex, not one-dimensional, aggregate types

895SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);

896

897// Do we know values for each element of the aggregate?

898auto KnowAllElts = [&AggElts]() {

899return !llvm::is_contained(AggElts, NotFound);

900 };

901

902intDepth = 0;

903

904// Arbitrary `insertvalue` visitation depth limit. Let's be okay with

905// every element being overwritten twice, which should never happen.

906staticconstint DepthLimit = 2 * NumAggElts;

907

908// Recurse up the chain of `insertvalue` aggregate operands until either we've

909// reconstructed full initializer or can't visit any more `insertvalue`'s.

910for (InsertValueInst *CurrIVI = &OrigIVI;

911Depth < DepthLimit && CurrIVI && !KnowAllElts();

912 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),

913 ++Depth) {

914auto *InsertedValue =

915 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());

916if (!InsertedValue)

917returnnullptr;// Inserted value must be produced by an instruction.

918

919ArrayRef<unsigned int> Indices = CurrIVI->getIndices();

920

921// Don't bother with more than single-level aggregates.

922if (Indices.size() != 1)

923returnnullptr;// FIXME: deal with more complex aggregates?

924

925// Now, we may have already previously recorded the value for this element

926// of an aggregate. If we did, that means the CurrIVI will later be

927// overwritten with the already-recorded value. But if not, let's record it!

928 std::optional<Instruction *> &Elt = AggElts[Indices.front()];

929 Elt = Elt.value_or(InsertedValue);

930

931// FIXME: should we handle chain-terminating undef base operand?

932 }

933

934// Was that sufficient to deduce the full initializer for the aggregate?

935if (!KnowAllElts())

936returnnullptr;// Give up then.

937

938// We now want to find the source[s] of the aggregate elements we've found.

939// And with "source" we mean the original aggregate[s] from which

940// the inserted elements were extracted. This may require PHI translation.

941

942enum class AggregateDescription {

943 /// When analyzing the value that was inserted into an aggregate, we did

944 /// not manage to find defining `extractvalue` instruction to analyze.

945 NotFound,

946 /// When analyzing the value that was inserted into an aggregate, we did

947 /// manage to find defining `extractvalue` instruction[s], and everything

948 /// matched perfectly - aggregate type, element insertion/extraction index.

949 Found,

950 /// When analyzing the value that was inserted into an aggregate, we did

951 /// manage to find defining `extractvalue` instruction, but there was

952 /// a mismatch: either the source type from which the extraction was didn't

953 /// match the aggregate type into which the insertion was,

954 /// or the extraction/insertion channels mismatched,

955 /// or different elements had different source aggregates.

956 FoundMismatch

957 };

958auto Describe = [](std::optional<Value *> SourceAggregate) {

959if (SourceAggregate == NotFound)

960return AggregateDescription::NotFound;

961if (*SourceAggregate == FoundMismatch)

962return AggregateDescription::FoundMismatch;

963return AggregateDescription::Found;

964 };

965

966// If an aggregate element is defined in UseBB, we can't use it in PredBB.

967bool EltDefinedInUseBB =false;

968

969// Given the value \p Elt that was being inserted into element \p EltIdx of an

970// aggregate AggTy, see if \p Elt was originally defined by an

971// appropriate extractvalue (same element index, same aggregate type).

972// If found, return the source aggregate from which the extraction was.

973// If \p PredBB is provided, does PHI translation of an \p Elt first.

974auto FindSourceAggregate =

975 [&](Instruction *Elt,unsigned EltIdx, std::optional<BasicBlock *> UseBB,

976 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {

977// For now(?), only deal with, at most, a single level of PHI indirection.

978if (UseBB && PredBB) {

979 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));

980if (Elt && Elt->getParent() == *UseBB)

981 EltDefinedInUseBB =true;

982 }

983// FIXME: deal with multiple levels of PHI indirection?

984

985// Did we find an extraction?

986auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);

987if (!EVI)

988return NotFound;

989

990Value *SourceAggregate = EVI->getAggregateOperand();

991

992// Is the extraction from the same type into which the insertion was?

993if (SourceAggregate->getType() != AggTy)

994return FoundMismatch;

995// And the element index doesn't change between extraction and insertion?

996if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())

997return FoundMismatch;

998

999return SourceAggregate;// AggregateDescription::Found

1000 };

1001

1002// Given elements AggElts that were constructing an aggregate OrigIVI,

1003// see if we can find appropriate source aggregate for each of the elements,

1004// and see it's the same aggregate for each element. If so, return it.

1005auto FindCommonSourceAggregate =

1006 [&](std::optional<BasicBlock *> UseBB,

1007 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {

1008 std::optional<Value *> SourceAggregate;

1009

1010for (autoI :enumerate(AggElts)) {

1011assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&

1012"We don't store nullptr in SourceAggregate!");

1013assert((Describe(SourceAggregate) == AggregateDescription::Found) ==

1014 (I.index() != 0) &&

1015"SourceAggregate should be valid after the first element,");

1016

1017// For this element, is there a plausible source aggregate?

1018// FIXME: we could special-case undef element, IFF we know that in the

1019// source aggregate said element isn't poison.

1020 std::optional<Value *> SourceAggregateForElement =

1021 FindSourceAggregate(*I.value(),I.index(), UseBB, PredBB);

1022

1023// Okay, what have we found? Does that correlate with previous findings?

1024

1025// Regardless of whether or not we have previously found source

1026// aggregate for previous elements (if any), if we didn't find one for

1027// this element, passthrough whatever we have just found.

1028if (Describe(SourceAggregateForElement) != AggregateDescription::Found)

1029return SourceAggregateForElement;

1030

1031// Okay, we have found source aggregate for this element.

1032// Let's see what we already know from previous elements, if any.

1033switch (Describe(SourceAggregate)) {

1034case AggregateDescription::NotFound:

1035// This is apparently the first element that we have examined.

1036 SourceAggregate = SourceAggregateForElement;// Record the aggregate!

1037continue;// Great, now look at next element.

1038case AggregateDescription::Found:

1039// We have previously already successfully examined other elements.

1040// Is this the same source aggregate we've found for other elements?

1041if (*SourceAggregateForElement != *SourceAggregate)

1042return FoundMismatch;

1043continue;// Still the same aggregate, look at next element.

1044case AggregateDescription::FoundMismatch:

1045llvm_unreachable("Can't happen. We would have early-exited then.");

1046 };

1047 }

1048

1049assert(Describe(SourceAggregate) == AggregateDescription::Found &&

1050"Must be a valid Value");

1051return *SourceAggregate;

1052 };

1053

1054 std::optional<Value *> SourceAggregate;

1055

1056// Can we find the source aggregate without looking at predecessors?

1057 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,

1058/*PredBB=*/std::nullopt);

1059if (Describe(SourceAggregate) != AggregateDescription::NotFound) {

1060if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)

1061returnnullptr;// Conflicting source aggregates!

1062 ++NumAggregateReconstructionsSimplified;

1063returnreplaceInstUsesWith(OrigIVI, *SourceAggregate);

1064 }

1065

1066// Okay, apparently we need to look at predecessors.

1067

1068// We should be smart about picking the "use" basic block, which will be the

1069// merge point for aggregate, where we'll insert the final PHI that will be

1070// used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.

1071// We should look in which blocks each of the AggElts is being defined,

1072// they all should be defined in the same basic block.

1073BasicBlock *UseBB =nullptr;

1074

1075for (const std::optional<Instruction *> &I : AggElts) {

1076BasicBlock *BB = (*I)->getParent();

1077// If it's the first instruction we've encountered, record the basic block.

1078if (!UseBB) {

1079 UseBB = BB;

1080continue;

1081 }

1082// Otherwise, this must be the same basic block we've seen previously.

1083if (UseBB != BB)

1084returnnullptr;

1085 }

1086

1087// If *all* of the elements are basic-block-independent, meaning they are

1088// either function arguments, or constant expressions, then if we didn't

1089// handle them without predecessor-aware handling, we won't handle them now.

1090if (!UseBB)

1091returnnullptr;

1092

1093// If we didn't manage to find source aggregate without looking at

1094// predecessors, and there are no predecessors to look at, then we're done.

1095if (pred_empty(UseBB))

1096returnnullptr;

1097

1098// Arbitrary predecessor count limit.

1099staticconstint PredCountLimit = 64;

1100

1101// Cache the (non-uniqified!) list of predecessors in a vector,

1102// checking the limit at the same time for efficiency.

1103SmallVector<BasicBlock *, 4> Preds;// May have duplicates!

1104for (BasicBlock *Pred :predecessors(UseBB)) {

1105// Don't bother if there are too many predecessors.

1106if (Preds.size() >= PredCountLimit)// FIXME: only count duplicates once?

1107returnnullptr;

1108 Preds.emplace_back(Pred);

1109 }

1110

1111// For each predecessor, what is the source aggregate,

1112// from which all the elements were originally extracted from?

1113// Note that we want for the map to have stable iteration order!

1114SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;

1115bool FoundSrcAgg =false;

1116for (BasicBlock *Pred : Preds) {

1117 std::pair<decltype(SourceAggregates)::iterator,bool>IV =

1118 SourceAggregates.insert({Pred,nullptr});

1119// Did we already evaluate this predecessor?

1120if (!IV.second)

1121continue;

1122

1123// Let's hope that when coming from predecessor Pred, all elements of the

1124// aggregate produced by OrigIVI must have been originally extracted from

1125// the same aggregate. Is that so? Can we find said original aggregate?

1126 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);

1127if (Describe(SourceAggregate) == AggregateDescription::Found) {

1128 FoundSrcAgg =true;

1129IV.first->second = *SourceAggregate;

1130 }else {

1131// If UseBB is the single successor of Pred, we can add InsertValue to

1132// Pred.

1133auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());

1134if (!BI || !BI->isUnconditional())

1135returnnullptr;

1136 }

1137 }

1138

1139if (!FoundSrcAgg)

1140returnnullptr;

1141

1142// Do some sanity check if we need to add insertvalue into predecessors.

1143auto OrigBB = OrigIVI.getParent();

1144for (auto &It : SourceAggregates) {

1145if (Describe(It.second) == AggregateDescription::Found)

1146continue;

1147

1148// Element is defined in UseBB, so it can't be used in predecessors.

1149if (EltDefinedInUseBB)

1150returnnullptr;

1151

1152// Do this transformation cross loop boundary may cause dead loop. So we

1153// should avoid this situation. But LoopInfo is not generally available, we

1154// must be conservative here.

1155// If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB

1156// can't be in inner loop.

1157if (UseBB != OrigBB)

1158returnnullptr;

1159

1160// Avoid constructing constant aggregate because constant value may expose

1161// more optimizations.

1162bool ConstAgg =true;

1163for (auto Val : AggElts) {

1164Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);

1165if (!isa<Constant>(Elt)) {

1166 ConstAgg =false;

1167break;

1168 }

1169 }

1170if (ConstAgg)

1171returnnullptr;

1172 }

1173

1174// For predecessors without appropriate source aggregate, create one in the

1175// predecessor.

1176for (auto &It : SourceAggregates) {

1177if (Describe(It.second) == AggregateDescription::Found)

1178continue;

1179

1180BasicBlock *Pred = It.first;

1181 Builder.SetInsertPoint(Pred->getTerminator());

1182Value *V =PoisonValue::get(AggTy);

1183for (auto [Idx, Val] :enumerate(AggElts)) {

1184Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);

1185V = Builder.CreateInsertValue(V, Elt,Idx);

1186 }

1187

1188 It.second =V;

1189 }

1190

1191// All good! Now we just need to thread the source aggregates here.

1192// Note that we have to insert the new PHI here, ourselves, because we can't

1193// rely on InstCombinerImpl::run() inserting it into the right basic block.

1194// Note that the same block can be a predecessor more than once,

1195// and we need to preserve that invariant for the PHI node.

1196BuilderTy::InsertPointGuard Guard(Builder);

1197 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());

1198auto *PHI =

1199 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() +".merged");

1200for (BasicBlock *Pred : Preds)

1201PHI->addIncoming(SourceAggregates[Pred], Pred);

1202

1203 ++NumAggregateReconstructionsSimplified;

1204return replaceInstUsesWith(OrigIVI,PHI);

1205}

1206

1207/// Try to find redundant insertvalue instructions, like the following ones:

1208/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0

1209/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0

1210/// Here the second instruction inserts values at the same indices, as the

1211/// first one, making the first one redundant.

1212/// It should be transformed to:

1213/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0

1214Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {

1215if (Value *V =simplifyInsertValueInst(

1216I.getAggregateOperand(),I.getInsertedValueOperand(),I.getIndices(),

1217SQ.getWithInstruction(&I)))

1218returnreplaceInstUsesWith(I, V);

1219

1220bool IsRedundant =false;

1221ArrayRef<unsigned int> FirstIndices =I.getIndices();

1222

1223// If there is a chain of insertvalue instructions (each of them except the

1224// last one has only one use and it's another insertvalue insn from this

1225// chain), check if any of the 'children' uses the same indices as the first

1226// instruction. In this case, the first one is redundant.

1227Value *V = &I;

1228unsignedDepth = 0;

1229while (V->hasOneUse() &&Depth < 10) {

1230User *U = V->user_back();

1231auto UserInsInst = dyn_cast<InsertValueInst>(U);

1232if (!UserInsInst || U->getOperand(0) != V)

1233break;

1234if (UserInsInst->getIndices() == FirstIndices) {

1235 IsRedundant =true;

1236break;

1237 }

1238 V = UserInsInst;

1239Depth++;

1240 }

1241

1242if (IsRedundant)

1243returnreplaceInstUsesWith(I,I.getOperand(0));

1244

1245if (Instruction *NewI =foldAggregateConstructionIntoAggregateReuse(I))

1246return NewI;

1247

1248returnnullptr;

1249}

1250

1251staticboolisShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {

1252// Can not analyze scalable type, the number of elements is not a compile-time

1253// constant.

1254if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))

1255returnfalse;

1256

1257int MaskSize = Shuf.getShuffleMask().size();

1258int VecSize =

1259 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();

1260

1261// A vector select does not change the size of the operands.

1262if (MaskSize != VecSize)

1263returnfalse;

1264

1265// Each mask element must be undefined or choose a vector element from one of

1266// the source operands without crossing vector lanes.

1267for (int i = 0; i != MaskSize; ++i) {

1268int Elt = Shuf.getMaskValue(i);

1269if (Elt != -1 && Elt != i && Elt != i + VecSize)

1270returnfalse;

1271 }

1272

1273returntrue;

1274}

1275

1276/// Turn a chain of inserts that splats a value into an insert + shuffle:

1277/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->

1278/// shufflevector(insertelt(X, %k, 0), poison, zero)

1279staticInstruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {

1280// We are interested in the last insert in a chain. So if this insert has a

1281// single user and that user is an insert, bail.

1282if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))

1283returnnullptr;

1284

1285VectorType *VecTy = InsElt.getType();

1286// Can not handle scalable type, the number of elements is not a compile-time

1287// constant.

1288if (isa<ScalableVectorType>(VecTy))

1289returnnullptr;

1290unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();

1291

1292// Do not try to do this for a one-element vector, since that's a nop,

1293// and will cause an inf-loop.

1294if (NumElements == 1)

1295returnnullptr;

1296

1297Value *SplatVal = InsElt.getOperand(1);

1298InsertElementInst *CurrIE = &InsElt;

1299SmallBitVector ElementPresent(NumElements,false);

1300InsertElementInst *FirstIE =nullptr;

1301

1302// Walk the chain backwards, keeping track of which indices we inserted into,

1303// until we hit something that isn't an insert of the splatted value.

1304while (CurrIE) {

1305auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));

1306if (!Idx || CurrIE->getOperand(1) != SplatVal)

1307returnnullptr;

1308

1309auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));

1310// Check none of the intermediate steps have any additional uses, except

1311// for the root insertelement instruction, which can be re-used, if it

1312// inserts at position 0.

1313if (CurrIE != &InsElt &&

1314 (!CurrIE->hasOneUse() && (NextIE !=nullptr || !Idx->isZero())))

1315returnnullptr;

1316

1317 ElementPresent[Idx->getZExtValue()] =true;

1318 FirstIE = CurrIE;

1319 CurrIE = NextIE;

1320 }

1321

1322// If this is just a single insertelement (not a sequence), we are done.

1323if (FirstIE == &InsElt)

1324returnnullptr;

1325

1326// If we are not inserting into a poison vector, make sure we've seen an

1327// insert into every element.

1328// TODO: If the base vector is not undef, it might be better to create a splat

1329// and then a select-shuffle (blend) with the base vector.

1330if (!match(FirstIE->getOperand(0),m_Poison()))

1331if (!ElementPresent.all())

1332returnnullptr;

1333

1334// Create the insert + shuffle.

1335Type *Int64Ty =Type::getInt64Ty(InsElt.getContext());

1336PoisonValue *PoisonVec =PoisonValue::get(VecTy);

1337Constant *Zero = ConstantInt::get(Int64Ty, 0);

1338if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())

1339 FirstIE =InsertElementInst::Create(PoisonVec, SplatVal, Zero,"",

1340 InsElt.getIterator());

1341

1342// Splat from element 0, but replace absent elements with poison in the mask.

1343SmallVector<int, 16> Mask(NumElements, 0);

1344for (unsigned i = 0; i != NumElements; ++i)

1345if (!ElementPresent[i])

1346 Mask[i] = -1;

1347

1348returnnewShuffleVectorInst(FirstIE, Mask);

1349}

1350

1351/// Try to fold an insert element into an existing splat shuffle by changing

1352/// the shuffle's mask to include the index of this insert element.

1353staticInstruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {

1354// Check if the vector operand of this insert is a canonical splat shuffle.

1355auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));

1356if (!Shuf || !Shuf->isZeroEltSplat())

1357returnnullptr;

1358

1359// Bail out early if shuffle is scalable type. The number of elements in

1360// shuffle mask is unknown at compile-time.

1361if (isa<ScalableVectorType>(Shuf->getType()))

1362returnnullptr;

1363

1364// Check for a constant insertion index.

1365uint64_t IdxC;

1366if (!match(InsElt.getOperand(2),m_ConstantInt(IdxC)))

1367returnnullptr;

1368

1369// Check if the splat shuffle's input is the same as this insert's scalar op.

1370Value *X = InsElt.getOperand(1);

1371Value *Op0 = Shuf->getOperand(0);

1372if (!match(Op0,m_InsertElt(m_Undef(),m_Specific(X),m_ZeroInt())))

1373returnnullptr;

1374

1375// Replace the shuffle mask element at the index of this insert with a zero.

1376// For example:

1377// inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1

1378// --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>

1379unsigned NumMaskElts =

1380 cast<FixedVectorType>(Shuf->getType())->getNumElements();

1381SmallVector<int, 16> NewMask(NumMaskElts);

1382for (unsigned i = 0; i != NumMaskElts; ++i)

1383 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);

1384

1385returnnewShuffleVectorInst(Op0, NewMask);

1386}

1387

1388/// Try to fold an extract+insert element into an existing identity shuffle by

1389/// changing the shuffle's mask to include the index of this insert element.

1390staticInstruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {

1391// Check if the vector operand of this insert is an identity shuffle.

1392auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));

1393if (!Shuf || !match(Shuf->getOperand(1),m_Poison()) ||

1394 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))

1395returnnullptr;

1396

1397// Bail out early if shuffle is scalable type. The number of elements in

1398// shuffle mask is unknown at compile-time.

1399if (isa<ScalableVectorType>(Shuf->getType()))

1400returnnullptr;

1401

1402// Check for a constant insertion index.

1403uint64_t IdxC;

1404if (!match(InsElt.getOperand(2),m_ConstantInt(IdxC)))

1405returnnullptr;

1406

1407// Check if this insert's scalar op is extracted from the identity shuffle's

1408// input vector.

1409Value *Scalar = InsElt.getOperand(1);

1410Value *X = Shuf->getOperand(0);

1411if (!match(Scalar,m_ExtractElt(m_Specific(X),m_SpecificInt(IdxC))))

1412returnnullptr;

1413

1414// Replace the shuffle mask element at the index of this extract+insert with

1415// that same index value.

1416// For example:

1417// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'

1418unsigned NumMaskElts =

1419 cast<FixedVectorType>(Shuf->getType())->getNumElements();

1420SmallVector<int, 16> NewMask(NumMaskElts);

1421ArrayRef<int> OldMask = Shuf->getShuffleMask();

1422for (unsigned i = 0; i != NumMaskElts; ++i) {

1423if (i != IdxC) {

1424// All mask elements besides the inserted element remain the same.

1425 NewMask[i] = OldMask[i];

1426 }elseif (OldMask[i] == (int)IdxC) {

1427// If the mask element was already set, there's nothing to do

1428// (demanded elements analysis may unset it later).

1429returnnullptr;

1430 }else {

1431assert(OldMask[i] ==PoisonMaskElem &&

1432"Unexpected shuffle mask element for identity shuffle");

1433 NewMask[i] = IdxC;

1434 }

1435 }

1436

1437returnnewShuffleVectorInst(X, Shuf->getOperand(1), NewMask);

1438}

1439

1440/// If we have an insertelement instruction feeding into another insertelement

1441/// and the 2nd is inserting a constant into the vector, canonicalize that

1442/// constant insertion before the insertion of a variable:

1443///

1444/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->

1445/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1

1446///

1447/// This has the potential of eliminating the 2nd insertelement instruction

1448/// via constant folding of the scalar constant into a vector constant.

1449staticInstruction *hoistInsEltConst(InsertElementInst &InsElt2,

1450InstCombiner::BuilderTy &Builder) {

1451auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));

1452if (!InsElt1 || !InsElt1->hasOneUse())

1453returnnullptr;

1454

1455Value *X, *Y;

1456Constant *ScalarC;

1457ConstantInt *IdxC1, *IdxC2;

1458if (match(InsElt1->getOperand(0),m_Value(X)) &&

1459match(InsElt1->getOperand(1),m_Value(Y)) && !isa<Constant>(Y) &&

1460match(InsElt1->getOperand(2),m_ConstantInt(IdxC1)) &&

1461match(InsElt2.getOperand(1),m_Constant(ScalarC)) &&

1462match(InsElt2.getOperand(2),m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {

1463Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);

1464returnInsertElementInst::Create(NewInsElt1,Y, IdxC1);

1465 }

1466

1467returnnullptr;

1468}

1469

1470/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex

1471/// --> shufflevector X, CVec', Mask'

1472staticInstruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {

1473auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));

1474// Bail out if the parent has more than one use. In that case, we'd be

1475// replacing the insertelt with a shuffle, and that's not a clear win.

1476if (!Inst || !Inst->hasOneUse())

1477returnnullptr;

1478if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {

1479// The shuffle must have a constant vector operand. The insertelt must have

1480// a constant scalar being inserted at a constant position in the vector.

1481Constant *ShufConstVec, *InsEltScalar;

1482uint64_t InsEltIndex;

1483if (!match(Shuf->getOperand(1),m_Constant(ShufConstVec)) ||

1484 !match(InsElt.getOperand(1),m_Constant(InsEltScalar)) ||

1485 !match(InsElt.getOperand(2),m_ConstantInt(InsEltIndex)))

1486returnnullptr;

1487

1488// Adding an element to an arbitrary shuffle could be expensive, but a

1489// shuffle that selects elements from vectors without crossing lanes is

1490// assumed cheap.

1491// If we're just adding a constant into that shuffle, it will still be

1492// cheap.

1493if (!isShuffleEquivalentToSelect(*Shuf))

1494returnnullptr;

1495

1496// From the above 'select' check, we know that the mask has the same number

1497// of elements as the vector input operands. We also know that each constant

1498// input element is used in its lane and can not be used more than once by

1499// the shuffle. Therefore, replace the constant in the shuffle's constant

1500// vector with the insertelt constant. Replace the constant in the shuffle's

1501// mask vector with the insertelt index plus the length of the vector

1502// (because the constant vector operand of a shuffle is always the 2nd

1503// operand).

1504ArrayRef<int> Mask = Shuf->getShuffleMask();

1505unsigned NumElts = Mask.size();

1506SmallVector<Constant *, 16> NewShufElts(NumElts);

1507SmallVector<int, 16> NewMaskElts(NumElts);

1508for (unsignedI = 0;I != NumElts; ++I) {

1509if (I == InsEltIndex) {

1510 NewShufElts[I] = InsEltScalar;

1511 NewMaskElts[I] = InsEltIndex + NumElts;

1512 }else {

1513// Copy over the existing values.

1514 NewShufElts[I] = ShufConstVec->getAggregateElement(I);

1515 NewMaskElts[I] = Mask[I];

1516 }

1517

1518// Bail if we failed to find an element.

1519if (!NewShufElts[I])

1520returnnullptr;

1521 }

1522

1523// Create new operands for a shuffle that includes the constant of the

1524// original insertelt. The old shuffle will be dead now.

1525returnnewShuffleVectorInst(Shuf->getOperand(0),

1526ConstantVector::get(NewShufElts), NewMaskElts);

1527 }elseif (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {

1528// Transform sequences of insertelements ops with constant data/indexes into

1529// a single shuffle op.

1530// Can not handle scalable type, the number of elements needed to create

1531// shuffle mask is not a compile-time constant.

1532if (isa<ScalableVectorType>(InsElt.getType()))

1533returnnullptr;

1534unsigned NumElts =

1535 cast<FixedVectorType>(InsElt.getType())->getNumElements();

1536

1537uint64_t InsertIdx[2];

1538Constant *Val[2];

1539if (!match(InsElt.getOperand(2),m_ConstantInt(InsertIdx[0])) ||

1540 !match(InsElt.getOperand(1),m_Constant(Val[0])) ||

1541 !match(IEI->getOperand(2),m_ConstantInt(InsertIdx[1])) ||

1542 !match(IEI->getOperand(1),m_Constant(Val[1])))

1543returnnullptr;

1544SmallVector<Constant *, 16> Values(NumElts);

1545SmallVector<int, 16> Mask(NumElts);

1546auto ValI = std::begin(Val);

1547// Generate new constant vector and mask.

1548// We have 2 values/masks from the insertelements instructions. Insert them

1549// into new value/mask vectors.

1550for (uint64_t I : InsertIdx) {

1551if (!Values[I]) {

1552 Values[I] = *ValI;

1553 Mask[I] = NumElts +I;

1554 }

1555 ++ValI;

1556 }

1557// Remaining values are filled with 'poison' values.

1558for (unsignedI = 0;I < NumElts; ++I) {

1559if (!Values[I]) {

1560 Values[I] =PoisonValue::get(InsElt.getType()->getElementType());

1561 Mask[I] =I;

1562 }

1563 }

1564// Create new operands for a shuffle that includes the constant of the

1565// original insertelt.

1566returnnewShuffleVectorInst(IEI->getOperand(0),

1567ConstantVector::get(Values), Mask);

1568 }

1569returnnullptr;

1570}

1571

1572/// If both the base vector and the inserted element are extended from the same

1573/// type, do the insert element in the narrow source type followed by extend.

1574/// TODO: This can be extended to include other cast opcodes, but particularly

1575/// if we create a wider insertelement, make sure codegen is not harmed.

1576staticInstruction *narrowInsElt(InsertElementInst &InsElt,

1577InstCombiner::BuilderTy &Builder) {

1578// We are creating a vector extend. If the original vector extend has another

1579// use, that would mean we end up with 2 vector extends, so avoid that.

1580// TODO: We could ease the use-clause to "if at least one op has one use"

1581// (assuming that the source types match - see next TODO comment).

1582Value *Vec = InsElt.getOperand(0);

1583if (!Vec->hasOneUse())

1584returnnullptr;

1585

1586Value *Scalar = InsElt.getOperand(1);

1587Value *X, *Y;

1588CastInst::CastOps CastOpcode;

1589if (match(Vec,m_FPExt(m_Value(X))) &&match(Scalar,m_FPExt(m_Value(Y))))

1590 CastOpcode = Instruction::FPExt;

1591elseif (match(Vec,m_SExt(m_Value(X))) &&match(Scalar,m_SExt(m_Value(Y))))

1592 CastOpcode = Instruction::SExt;

1593elseif (match(Vec,m_ZExt(m_Value(X))) &&match(Scalar,m_ZExt(m_Value(Y))))

1594 CastOpcode = Instruction::ZExt;

1595else

1596returnnullptr;

1597

1598// TODO: We can allow mismatched types by creating an intermediate cast.

1599if (X->getType()->getScalarType() !=Y->getType())

1600returnnullptr;

1601

1602// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)

1603Value *NewInsElt = Builder.CreateInsertElement(X,Y, InsElt.getOperand(2));

1604returnCastInst::Create(CastOpcode, NewInsElt, InsElt.getType());

1605}

1606

1607/// If we are inserting 2 halves of a value into adjacent elements of a vector,

1608/// try to convert to a single insert with appropriate bitcasts.

1609staticInstruction *foldTruncInsEltPair(InsertElementInst &InsElt,

1610bool IsBigEndian,

1611InstCombiner::BuilderTy &Builder) {

1612Value *VecOp = InsElt.getOperand(0);

1613Value *ScalarOp = InsElt.getOperand(1);

1614Value *IndexOp = InsElt.getOperand(2);

1615

1616// Pattern depends on endian because we expect lower index is inserted first.

1617// Big endian:

1618// inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1

1619// Little endian:

1620// inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1

1621// Note: It is not safe to do this transform with an arbitrary base vector

1622// because the bitcast of that vector to fewer/larger elements could

1623// allow poison to spill into an element that was not poison before.

1624// TODO: Detect smaller fractions of the scalar.

1625// TODO: One-use checks are conservative.

1626auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());

1627Value *Scalar0, *BaseVec;

1628uint64_t Index0, Index1;

1629if (!VTy || (VTy->getNumElements() & 1) ||

1630 !match(IndexOp,m_ConstantInt(Index1)) ||

1631 !match(VecOp,m_InsertElt(m_Value(BaseVec),m_Value(Scalar0),

1632m_ConstantInt(Index0))) ||

1633 !match(BaseVec,m_Undef()))

1634returnnullptr;

1635

1636// The first insert must be to the index one less than this one, and

1637// the first insert must be to an even index.

1638if (Index0 + 1 != Index1 || Index0 & 1)

1639returnnullptr;

1640

1641// For big endian, the high half of the value should be inserted first.

1642// For little endian, the low half of the value should be inserted first.

1643Value *X;

1644uint64_t ShAmt;

1645if (IsBigEndian) {

1646if (!match(ScalarOp,m_Trunc(m_Value(X))) ||

1647 !match(Scalar0,m_Trunc(m_LShr(m_Specific(X),m_ConstantInt(ShAmt)))))

1648returnnullptr;

1649 }else {

1650if (!match(Scalar0,m_Trunc(m_Value(X))) ||

1651 !match(ScalarOp,m_Trunc(m_LShr(m_Specific(X),m_ConstantInt(ShAmt)))))

1652returnnullptr;

1653 }

1654

1655Type *SrcTy =X->getType();

1656unsigned ScalarWidth = SrcTy->getScalarSizeInBits();

1657unsigned VecEltWidth = VTy->getScalarSizeInBits();

1658if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)

1659returnnullptr;

1660

1661// Bitcast the base vector to a vector type with the source element type.

1662Type *CastTy =FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);

1663Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);

1664

1665// Scale the insert index for a vector with half as many elements.

1666// bitcast (inselt (bitcast BaseVec), X, NewIndex)

1667uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;

1668Value *NewInsert = Builder.CreateInsertElement(CastBaseVec,X, NewIndex);

1669returnnewBitCastInst(NewInsert, VTy);

1670}

1671

1672Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {

1673Value *VecOp = IE.getOperand(0);

1674Value *ScalarOp = IE.getOperand(1);

1675Value *IdxOp = IE.getOperand(2);

1676

1677if (auto *V =simplifyInsertElementInst(

1678 VecOp, ScalarOp, IdxOp,SQ.getWithInstruction(&IE)))

1679returnreplaceInstUsesWith(IE, V);

1680

1681// Canonicalize type of constant indices to i64 to simplify CSE

1682if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {

1683if (auto *NewIdx =getPreferredVectorIndex(IndexC))

1684returnreplaceOperand(IE, 2, NewIdx);

1685

1686Value *BaseVec, *OtherScalar;

1687uint64_t OtherIndexVal;

1688if (match(VecOp,m_OneUse(m_InsertElt(m_Value(BaseVec),

1689m_Value(OtherScalar),

1690m_ConstantInt(OtherIndexVal)))) &&

1691 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {

1692Value *NewIns =Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);

1693returnInsertElementInst::Create(NewIns, OtherScalar,

1694Builder.getInt64(OtherIndexVal));

1695 }

1696 }

1697

1698// If the scalar is bitcast and inserted into undef, do the insert in the

1699// source type followed by bitcast.

1700// TODO: Generalize for insert into any constant, not just undef?

1701Value *ScalarSrc;

1702if (match(VecOp,m_Undef()) &&

1703match(ScalarOp,m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&

1704 (ScalarSrc->getType()->isIntegerTy() ||

1705 ScalarSrc->getType()->isFloatingPointTy())) {

1706// inselt undef, (bitcast ScalarSrc), IdxOp -->

1707// bitcast (inselt undef, ScalarSrc, IdxOp)

1708Type *ScalarTy = ScalarSrc->getType();

1709Type *VecTy =VectorType::get(ScalarTy, IE.getType()->getElementCount());

1710Constant *NewUndef = isa<PoisonValue>(VecOp) ?PoisonValue::get(VecTy)

1711 :UndefValue::get(VecTy);

1712Value *NewInsElt =Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);

1713returnnewBitCastInst(NewInsElt, IE.getType());

1714 }

1715

1716// If the vector and scalar are both bitcast from the same element type, do

1717// the insert in that source type followed by bitcast.

1718Value *VecSrc;

1719if (match(VecOp,m_BitCast(m_Value(VecSrc))) &&

1720match(ScalarOp,m_BitCast(m_Value(ScalarSrc))) &&

1721 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&

1722 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&

1723 cast<VectorType>(VecSrc->getType())->getElementType() ==

1724 ScalarSrc->getType()) {

1725// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->

1726// bitcast (inselt VecSrc, ScalarSrc, IdxOp)

1727Value *NewInsElt =Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);

1728returnnewBitCastInst(NewInsElt, IE.getType());

1729 }

1730

1731// If the inserted element was extracted from some other fixed-length vector

1732// and both indexes are valid constants, try to turn this into a shuffle.

1733// Can not handle scalable vector type, the number of elements needed to

1734// create shuffle mask is not a compile-time constant.

1735uint64_t InsertedIdx, ExtractedIdx;

1736Value *ExtVecOp;

1737if (isa<FixedVectorType>(IE.getType()) &&

1738match(IdxOp,m_ConstantInt(InsertedIdx)) &&

1739match(ScalarOp,

1740m_ExtractElt(m_Value(ExtVecOp),m_ConstantInt(ExtractedIdx))) &&

1741 isa<FixedVectorType>(ExtVecOp->getType()) &&

1742 ExtractedIdx <

1743 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {

1744// TODO: Looking at the user(s) to determine if this insert is a

1745// fold-to-shuffle opportunity does not match the usual instcombine

1746// constraints. We should decide if the transform is worthy based only

1747// on this instruction and its operands, but that may not work currently.

1748//

1749// Here, we are trying to avoid creating shuffles before reaching

1750// the end of a chain of extract-insert pairs. This is complicated because

1751// we do not generally form arbitrary shuffle masks in instcombine

1752// (because those may codegen poorly), but collectShuffleElements() does

1753// exactly that.

1754//

1755// The rules for determining what is an acceptable target-independent

1756// shuffle mask are fuzzy because they evolve based on the backend's

1757// capabilities and real-world impact.

1758auto isShuffleRootCandidate = [](InsertElementInst &Insert) {

1759if (!Insert.hasOneUse())

1760returntrue;

1761auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());

1762if (!InsertUser)

1763returntrue;

1764returnfalse;

1765 };

1766

1767// Try to form a shuffle from a chain of extract-insert ops.

1768if (isShuffleRootCandidate(IE)) {

1769bool Rerun =true;

1770while (Rerun) {

1771 Rerun =false;

1772

1773SmallVector<int, 16> Mask;

1774ShuffleOps LR =

1775collectShuffleElements(&IE, Mask,nullptr, *this, Rerun);

1776

1777// The proposed shuffle may be trivial, in which case we shouldn't

1778// perform the combine.

1779if (LR.first != &IE && LR.second != &IE) {

1780// We now have a shuffle of LHS, RHS, Mask.

1781if (LR.second ==nullptr)

1782 LR.second =PoisonValue::get(LR.first->getType());

1783returnnewShuffleVectorInst(LR.first, LR.second, Mask);

1784 }

1785 }

1786 }

1787 }

1788

1789if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {

1790unsigned VWidth = VecTy->getNumElements();

1791APInt PoisonElts(VWidth, 0);

1792APInt AllOnesEltMask(APInt::getAllOnes(VWidth));

1793if (Value *V =SimplifyDemandedVectorElts(&IE, AllOnesEltMask,

1794 PoisonElts)) {

1795if (V != &IE)

1796returnreplaceInstUsesWith(IE, V);

1797return &IE;

1798 }

1799 }

1800

1801if (Instruction *Shuf =foldConstantInsEltIntoShuffle(IE))

1802return Shuf;

1803

1804if (Instruction *NewInsElt =hoistInsEltConst(IE,Builder))

1805return NewInsElt;

1806

1807if (Instruction *Broadcast =foldInsSequenceIntoSplat(IE))

1808return Broadcast;

1809

1810if (Instruction *Splat =foldInsEltIntoSplat(IE))

1811returnSplat;

1812

1813if (Instruction *IdentityShuf =foldInsEltIntoIdentityShuffle(IE))

1814return IdentityShuf;

1815

1816if (Instruction *Ext =narrowInsElt(IE,Builder))

1817return Ext;

1818

1819if (Instruction *Ext =foldTruncInsEltPair(IE,DL.isBigEndian(),Builder))

1820return Ext;

1821

1822returnnullptr;

1823}

1824

1825/// Return true if we can evaluate the specified expression tree if the vector

1826/// elements were shuffled in a different order.

1827staticboolcanEvaluateShuffled(Value *V,ArrayRef<int> Mask,

1828unsignedDepth = 5) {

1829// We can always reorder the elements of a constant.

1830if (isa<Constant>(V))

1831returntrue;

1832

1833// We won't reorder vector arguments. No IPO here.

1834Instruction *I = dyn_cast<Instruction>(V);

1835if (!I)returnfalse;

1836

1837// Two users may expect different orders of the elements. Don't try it.

1838if (!I->hasOneUse())

1839returnfalse;

1840

1841if (Depth == 0)returnfalse;

1842

1843switch (I->getOpcode()) {

1844case Instruction::UDiv:

1845case Instruction::SDiv:

1846case Instruction::URem:

1847case Instruction::SRem:

1848// Propagating an undefined shuffle mask element to integer div/rem is not

1849// allowed because those opcodes can create immediate undefined behavior

1850// from an undefined element in an operand.

1851if (llvm::is_contained(Mask, -1))

1852returnfalse;

1853 [[fallthrough]];

1854case Instruction::Add:

1855case Instruction::FAdd:

1856case Instruction::Sub:

1857case Instruction::FSub:

1858case Instruction::Mul:

1859case Instruction::FMul:

1860case Instruction::FDiv:

1861case Instruction::FRem:

1862case Instruction::Shl:

1863case Instruction::LShr:

1864case Instruction::AShr:

1865case Instruction::And:

1866case Instruction::Or:

1867case Instruction::Xor:

1868case Instruction::ICmp:

1869case Instruction::FCmp:

1870case Instruction::Trunc:

1871case Instruction::ZExt:

1872case Instruction::SExt:

1873case Instruction::FPToUI:

1874case Instruction::FPToSI:

1875case Instruction::UIToFP:

1876case Instruction::SIToFP:

1877case Instruction::FPTrunc:

1878case Instruction::FPExt:

1879case Instruction::GetElementPtr: {

1880// Bail out if we would create longer vector ops. We could allow creating

1881// longer vector ops, but that may result in more expensive codegen.

1882Type *ITy =I->getType();

1883if (ITy->isVectorTy() &&

1884 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())

1885returnfalse;

1886for (Value *Operand :I->operands()) {

1887if (!canEvaluateShuffled(Operand, Mask,Depth - 1))

1888returnfalse;

1889 }

1890returntrue;

1891 }

1892case Instruction::InsertElement: {

1893ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));

1894if (!CI)returnfalse;

1895int ElementNumber = CI->getLimitedValue();

1896

1897// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'

1898// can't put an element into multiple indices.

1899bool SeenOnce =false;

1900for (intI : Mask) {

1901if (I == ElementNumber) {

1902if (SeenOnce)

1903returnfalse;

1904 SeenOnce =true;

1905 }

1906 }

1907returncanEvaluateShuffled(I->getOperand(0), Mask,Depth - 1);

1908 }

1909 }

1910returnfalse;

1911}

1912

1913/// Rebuild a new instruction just like 'I' but with the new operands given.

1914/// In the event of type mismatch, the type of the operands is correct.

1915staticValue *buildNew(Instruction *I,ArrayRef<Value*> NewOps,

1916IRBuilderBase &Builder) {

1917 Builder.SetInsertPoint(I);

1918switch (I->getOpcode()) {

1919case Instruction::Add:

1920case Instruction::FAdd:

1921case Instruction::Sub:

1922case Instruction::FSub:

1923case Instruction::Mul:

1924case Instruction::FMul:

1925case Instruction::UDiv:

1926case Instruction::SDiv:

1927case Instruction::FDiv:

1928case Instruction::URem:

1929case Instruction::SRem:

1930case Instruction::FRem:

1931case Instruction::Shl:

1932case Instruction::LShr:

1933case Instruction::AShr:

1934case Instruction::And:

1935case Instruction::Or:

1936case Instruction::Xor: {

1937BinaryOperator *BO = cast<BinaryOperator>(I);

1938assert(NewOps.size() == 2 &&"binary operator with #ops != 2");

1939Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),

1940 NewOps[0], NewOps[1]);

1941if (auto *NewI = dyn_cast<Instruction>(New)) {

1942if (isa<OverflowingBinaryOperator>(BO)) {

1943 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());

1944 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());

1945 }

1946if (isa<PossiblyExactOperator>(BO)) {

1947 NewI->setIsExact(BO->isExact());

1948 }

1949if (isa<FPMathOperator>(BO))

1950 NewI->copyFastMathFlags(I);

1951 }

1952return New;

1953 }

1954case Instruction::ICmp:

1955assert(NewOps.size() == 2 &&"icmp with #ops != 2");

1956return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],

1957 NewOps[1]);

1958case Instruction::FCmp:

1959assert(NewOps.size() == 2 &&"fcmp with #ops != 2");

1960return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],

1961 NewOps[1]);

1962case Instruction::Trunc:

1963case Instruction::ZExt:

1964case Instruction::SExt:

1965case Instruction::FPToUI:

1966case Instruction::FPToSI:

1967case Instruction::UIToFP:

1968case Instruction::SIToFP:

1969case Instruction::FPTrunc:

1970case Instruction::FPExt: {

1971// It's possible that the mask has a different number of elements from

1972// the original cast. We recompute the destination type to match the mask.

1973Type *DestTy =VectorType::get(

1974I->getType()->getScalarType(),

1975 cast<VectorType>(NewOps[0]->getType())->getElementCount());

1976assert(NewOps.size() == 1 &&"cast with #ops != 1");

1977return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],

1978 DestTy);

1979 }

1980case Instruction::GetElementPtr: {

1981Value *Ptr = NewOps[0];

1982ArrayRef<Value*>Idx = NewOps.slice(1);

1983return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),

1984Ptr,Idx,"",

1985 cast<GEPOperator>(I)->isInBounds());

1986 }

1987 }

1988llvm_unreachable("failed to rebuild vector instructions");

1989}

1990

1991staticValue *evaluateInDifferentElementOrder(Value *V,ArrayRef<int> Mask,

1992IRBuilderBase &Builder) {

1993// Mask.size() does not need to be equal to the number of vector elements.

1994

1995assert(V->getType()->isVectorTy() &&"can't reorder non-vector elements");

1996Type *EltTy = V->getType()->getScalarType();

1997

1998if (isa<PoisonValue>(V))

1999returnPoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));

2000

2001if (match(V,m_Undef()))

2002returnUndefValue::get(FixedVectorType::get(EltTy, Mask.size()));

2003

2004if (isa<ConstantAggregateZero>(V))

2005returnConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));

2006

2007if (Constant *C = dyn_cast<Constant>(V))

2008returnConstantExpr::getShuffleVector(C,PoisonValue::get(C->getType()),

2009 Mask);

2010

2011Instruction *I = cast<Instruction>(V);

2012switch (I->getOpcode()) {

2013case Instruction::Add:

2014case Instruction::FAdd:

2015case Instruction::Sub:

2016case Instruction::FSub:

2017case Instruction::Mul:

2018case Instruction::FMul:

2019case Instruction::UDiv:

2020case Instruction::SDiv:

2021case Instruction::FDiv:

2022case Instruction::URem:

2023case Instruction::SRem:

2024case Instruction::FRem:

2025case Instruction::Shl:

2026case Instruction::LShr:

2027case Instruction::AShr:

2028case Instruction::And:

2029case Instruction::Or:

2030case Instruction::Xor:

2031case Instruction::ICmp:

2032case Instruction::FCmp:

2033case Instruction::Trunc:

2034case Instruction::ZExt:

2035case Instruction::SExt:

2036case Instruction::FPToUI:

2037case Instruction::FPToSI:

2038case Instruction::UIToFP:

2039case Instruction::SIToFP:

2040case Instruction::FPTrunc:

2041case Instruction::FPExt:

2042case Instruction::Select:

2043case Instruction::GetElementPtr: {

2044SmallVector<Value*, 8> NewOps;

2045bool NeedsRebuild =

2046 (Mask.size() !=

2047 cast<FixedVectorType>(I->getType())->getNumElements());

2048for (int i = 0, e =I->getNumOperands(); i != e; ++i) {

2049Value *V;

2050// Recursively call evaluateInDifferentElementOrder on vector arguments

2051// as well. E.g. GetElementPtr may have scalar operands even if the

2052// return value is a vector, so we need to examine the operand type.

2053if (I->getOperand(i)->getType()->isVectorTy())

2054 V =evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);

2055else

2056 V =I->getOperand(i);

2057 NewOps.push_back(V);

2058 NeedsRebuild |= (V !=I->getOperand(i));

2059 }

2060if (NeedsRebuild)

2061returnbuildNew(I, NewOps, Builder);

2062returnI;

2063 }

2064case Instruction::InsertElement: {

2065int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();

2066

2067// The insertelement was inserting at Element. Figure out which element

2068// that becomes after shuffling. The answer is guaranteed to be unique

2069// by CanEvaluateShuffled.

2070bool Found =false;

2071int Index = 0;

2072for (int e = Mask.size(); Index != e; ++Index) {

2073if (Mask[Index] == Element) {

2074 Found =true;

2075break;

2076 }

2077 }

2078

2079// If element is not in Mask, no need to handle the operand 1 (element to

2080// be inserted). Just evaluate values in operand 0 according to Mask.

2081if (!Found)

2082returnevaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);

2083

2084Value *V =evaluateInDifferentElementOrder(I->getOperand(0), Mask,

2085 Builder);

2086 Builder.SetInsertPoint(I);

2087return Builder.CreateInsertElement(V,I->getOperand(1), Index);

2088 }

2089 }

2090llvm_unreachable("failed to reorder elements of vector instruction!");

2091}

2092

2093// Returns true if the shuffle is extracting a contiguous range of values from

2094// LHS, for example:

2095// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

2096// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|

2097// Shuffles to: |EE|FF|GG|HH|

2098// +--+--+--+--+

2099staticboolisShuffleExtractingFromLHS(ShuffleVectorInst &SVI,

2100ArrayRef<int> Mask) {

2101unsigned LHSElems =

2102 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();

2103unsigned MaskElems = Mask.size();

2104unsigned BegIdx = Mask.front();

2105unsigned EndIdx = Mask.back();

2106if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)

2107returnfalse;

2108for (unsignedI = 0;I != MaskElems; ++I)

2109if (static_cast<unsigned>(Mask[I]) != BegIdx +I)

2110returnfalse;

2111returntrue;

2112}

2113

2114/// These are the ingredients in an alternate form binary operator as described

2115/// below.

2116structBinopElts {

2117BinaryOperator::BinaryOps Opcode;

2118Value *Op0;

2119Value *Op1;

2120BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,

2121Value *V0 =nullptr,Value *V1 =nullptr) :

2122 Opcode(Opc), Op0(V0), Op1(V1) {}

2123operatorbool() const{return Opcode != 0; }

2124};

2125

2126/// Binops may be transformed into binops with different opcodes and operands.

2127/// Reverse the usual canonicalization to enable folds with the non-canonical

2128/// form of the binop. If a transform is possible, return the elements of the

2129/// new binop. If not, return invalid elements.

2130staticBinopElts getAlternateBinop(BinaryOperator *BO,constDataLayout &DL) {

2131Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);

2132Type *Ty = BO->getType();

2133switch (BO->getOpcode()) {

2134case Instruction::Shl: {

2135// shl X, C --> mul X, (1 << C)

2136Constant *C;

2137if (match(BO1,m_ImmConstant(C))) {

2138Constant *ShlOne =ConstantFoldBinaryOpOperands(

2139 Instruction::Shl, ConstantInt::get(Ty, 1),C,DL);

2140assert(ShlOne &&"Constant folding of immediate constants failed");

2141return {Instruction::Mul, BO0, ShlOne};

2142 }

2143break;

2144 }

2145case Instruction::Or: {

2146// or disjoin X, C --> add X, C

2147if (cast<PossiblyDisjointInst>(BO)->isDisjoint())

2148return {Instruction::Add, BO0, BO1};

2149break;

2150 }

2151case Instruction::Sub:

2152// sub 0, X --> mul X, -1

2153if (match(BO0,m_ZeroInt()))

2154return {Instruction::Mul, BO1,ConstantInt::getAllOnesValue(Ty)};

2155break;

2156default:

2157break;

2158 }

2159return {};

2160}

2161

2162/// A select shuffle of a select shuffle with a shared operand can be reduced

2163/// to a single select shuffle. This is an obvious improvement in IR, and the

2164/// backend is expected to lower select shuffles efficiently.

2165staticInstruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {

2166assert(Shuf.isSelect() &&"Must have select-equivalent shuffle");

2167

2168Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

2169SmallVector<int, 16> Mask;

2170 Shuf.getShuffleMask(Mask);

2171unsigned NumElts = Mask.size();

2172

2173// Canonicalize a select shuffle with common operand as Op1.

2174auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);

2175if (ShufOp && ShufOp->isSelect() &&

2176 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {

2177std::swap(Op0, Op1);

2178ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);

2179 }

2180

2181 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);

2182if (!ShufOp || !ShufOp->isSelect() ||

2183 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))

2184returnnullptr;

2185

2186Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);

2187SmallVector<int, 16> Mask1;

2188 ShufOp->getShuffleMask(Mask1);

2189assert(Mask1.size() == NumElts &&"Vector size changed with select shuffle");

2190

2191// Canonicalize common operand (Op0) as X (first operand of first shuffle).

2192if (Y == Op0) {

2193std::swap(X,Y);

2194ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);

2195 }

2196

2197// If the mask chooses from X (operand 0), it stays the same.

2198// If the mask chooses from the earlier shuffle, the other mask value is

2199// transferred to the combined select shuffle:

2200// shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'

2201SmallVector<int, 16> NewMask(NumElts);

2202for (unsigned i = 0; i != NumElts; ++i)

2203 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];

2204

2205// A select mask with undef elements might look like an identity mask.

2206assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||

2207ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&

2208"Unexpected shuffle mask");

2209returnnewShuffleVectorInst(X,Y, NewMask);

2210}

2211

2212staticInstruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,

2213constSimplifyQuery &SQ) {

2214assert(Shuf.isSelect() &&"Must have select-equivalent shuffle");

2215

2216// Are we shuffling together some value and that same value after it has been

2217// modified by a binop with a constant?

2218Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

2219Constant *C;

2220bool Op0IsBinop;

2221if (match(Op0,m_BinOp(m_Specific(Op1),m_Constant(C))))

2222 Op0IsBinop =true;

2223elseif (match(Op1,m_BinOp(m_Specific(Op0),m_Constant(C))))

2224 Op0IsBinop =false;

2225else

2226returnnullptr;

2227

2228// The identity constant for a binop leaves a variable operand unchanged. For

2229// a vector, this is a splat of something like 0, -1, or 1.

2230// If there's no identity constant for this binop, we're done.

2231auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);

2232BinaryOperator::BinaryOps BOpcode = BO->getOpcode();

2233Constant *IdC =ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(),true);

2234if (!IdC)

2235returnnullptr;

2236

2237Value *X = Op0IsBinop ? Op1 : Op0;

2238

2239// Prevent folding in the case the non-binop operand might have NaN values.

2240// If X can have NaN elements then we have that the floating point math

2241// operation in the transformed code may not preserve the exact NaN

2242// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.

2243// This makes the transformation incorrect since the original program would

2244// have preserved the exact NaN bit-pattern.

2245// Avoid the folding if X can have NaN elements.

2246if (Shuf.getType()->getElementType()->isFloatingPointTy() &&

2247 !isKnownNeverNaN(X, 0, SQ))

2248returnnullptr;

2249

2250// Shuffle identity constants into the lanes that return the original value.

2251// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}

2252// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}

2253// The existing binop constant vector remains in the same operand position.

2254ArrayRef<int> Mask = Shuf.getShuffleMask();

2255Constant *NewC = Op0IsBinop ?ConstantExpr::getShuffleVector(C, IdC, Mask) :

2256ConstantExpr::getShuffleVector(IdC,C, Mask);

2257

2258bool MightCreatePoisonOrUB =

2259is_contained(Mask,PoisonMaskElem) &&

2260 (Instruction::isIntDivRem(BOpcode) ||Instruction::isShift(BOpcode));

2261if (MightCreatePoisonOrUB)

2262 NewC =InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC,true);

2263

2264// shuf (bop X, C), X, M --> bop X, C'

2265// shuf X, (bop X, C), M --> bop X, C'

2266Instruction *NewBO =BinaryOperator::Create(BOpcode,X, NewC);

2267 NewBO->copyIRFlags(BO);

2268

2269// An undef shuffle mask element may propagate as an undef constant element in

2270// the new binop. That would produce poison where the original code might not.

2271// If we already made a safe constant, then there's no danger.

2272if (is_contained(Mask,PoisonMaskElem) && !MightCreatePoisonOrUB)

2273 NewBO->dropPoisonGeneratingFlags();

2274return NewBO;

2275}

2276

2277/// If we have an insert of a scalar to a non-zero element of an undefined

2278/// vector and then shuffle that value, that's the same as inserting to the zero

2279/// element and shuffling. Splatting from the zero element is recognized as the

2280/// canonical form of splat.

2281staticInstruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,

2282InstCombiner::BuilderTy &Builder) {

2283Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

2284ArrayRef<int> Mask = Shuf.getShuffleMask();

2285Value *X;

2286uint64_t IndexC;

2287

2288// Match a shuffle that is a splat to a non-zero element.

2289if (!match(Op0,m_OneUse(m_InsertElt(m_Poison(),m_Value(X),

2290m_ConstantInt(IndexC)))) ||

2291 !match(Op1,m_Poison()) ||match(Mask,m_ZeroMask()) || IndexC == 0)

2292returnnullptr;

2293

2294// Insert into element 0 of a poison vector.

2295PoisonValue *PoisonVec =PoisonValue::get(Shuf.getType());

2296Value *NewIns = Builder.CreateInsertElement(PoisonVec,X, (uint64_t)0);

2297

2298// Splat from element 0. Any mask element that is poison remains poison.

2299// For example:

2300// shuf (inselt poison, X, 2), _, <2,2,undef>

2301// --> shuf (inselt poison, X, 0), poison, <0,0,undef>

2302unsigned NumMaskElts =

2303 cast<FixedVectorType>(Shuf.getType())->getNumElements();

2304SmallVector<int, 16> NewMask(NumMaskElts, 0);

2305for (unsigned i = 0; i != NumMaskElts; ++i)

2306if (Mask[i] ==PoisonMaskElem)

2307 NewMask[i] = Mask[i];

2308

2309returnnewShuffleVectorInst(NewIns, NewMask);

2310}

2311

2312/// Try to fold shuffles that are the equivalent of a vector select.

2313Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {

2314if (!Shuf.isSelect())

2315returnnullptr;

2316

2317// Canonicalize to choose from operand 0 first unless operand 1 is undefined.

2318// Commuting undef to operand 0 conflicts with another canonicalization.

2319unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();

2320if (!match(Shuf.getOperand(1),m_Undef()) &&

2321 Shuf.getMaskValue(0) >= (int)NumElts) {

2322// TODO: Can we assert that both operands of a shuffle-select are not undef

2323// (otherwise, it would have been folded by instsimplify?

2324 Shuf.commute();

2325return &Shuf;

2326 }

2327

2328if (Instruction *I =foldSelectShuffleOfSelectShuffle(Shuf))

2329returnI;

2330

2331if (Instruction *I =foldSelectShuffleWith1Binop(

2332 Shuf,getSimplifyQuery().getWithInstruction(&Shuf)))

2333returnI;

2334

2335BinaryOperator *B0, *B1;

2336if (!match(Shuf.getOperand(0),m_BinOp(B0)) ||

2337 !match(Shuf.getOperand(1),m_BinOp(B1)))

2338returnnullptr;

2339

2340// If one operand is "0 - X", allow that to be viewed as "X * -1"

2341// (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired

2342// with a multiply, we will exit because C0/C1 will not be set.

2343Value *X, *Y;

2344Constant *C0 =nullptr, *C1 =nullptr;

2345bool ConstantsAreOp1;

2346if (match(B0,m_BinOp(m_Constant(C0),m_Value(X))) &&

2347match(B1,m_BinOp(m_Constant(C1),m_Value(Y))))

2348 ConstantsAreOp1 =false;

2349elseif (match(B0,m_CombineOr(m_BinOp(m_Value(X),m_Constant(C0)),

2350m_Neg(m_Value(X)))) &&

2351match(B1,m_CombineOr(m_BinOp(m_Value(Y),m_Constant(C1)),

2352m_Neg(m_Value(Y)))))

2353 ConstantsAreOp1 =true;

2354else

2355returnnullptr;

2356

2357// We need matching binops to fold the lanes together.

2358BinaryOperator::BinaryOps Opc0 = B0->getOpcode();

2359BinaryOperator::BinaryOps Opc1 = B1->getOpcode();

2360bool DropNSW =false;

2361if (ConstantsAreOp1 && Opc0 != Opc1) {

2362// TODO: We drop "nsw" if shift is converted into multiply because it may

2363// not be correct when the shift amount is BitWidth - 1. We could examine

2364// each vector element to determine if it is safe to keep that flag.

2365if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)

2366 DropNSW =true;

2367if (BinopElts AltB0 =getAlternateBinop(B0,DL)) {

2368assert(isa<Constant>(AltB0.Op1) &&"Expecting constant with alt binop");

2369 Opc0 = AltB0.Opcode;

2370 C0 = cast<Constant>(AltB0.Op1);

2371 }elseif (BinopElts AltB1 =getAlternateBinop(B1,DL)) {

2372assert(isa<Constant>(AltB1.Op1) &&"Expecting constant with alt binop");

2373 Opc1 = AltB1.Opcode;

2374 C1 = cast<Constant>(AltB1.Op1);

2375 }

2376 }

2377

2378if (Opc0 != Opc1 || !C0 || !C1)

2379returnnullptr;

2380

2381// The opcodes must be the same. Use a new name to make that clear.

2382BinaryOperator::BinaryOps BOpc = Opc0;

2383

2384// Select the constant elements needed for the single binop.

2385ArrayRef<int> Mask = Shuf.getShuffleMask();

2386Constant *NewC =ConstantExpr::getShuffleVector(C0, C1, Mask);

2387

2388// We are moving a binop after a shuffle. When a shuffle has an undefined

2389// mask element, the result is undefined, but it is not poison or undefined

2390// behavior. That is not necessarily true for div/rem/shift.

2391bool MightCreatePoisonOrUB =

2392is_contained(Mask,PoisonMaskElem) &&

2393 (Instruction::isIntDivRem(BOpc) ||Instruction::isShift(BOpc));

2394if (MightCreatePoisonOrUB)

2395 NewC =InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,

2396 ConstantsAreOp1);

2397

2398Value *V;

2399if (X ==Y) {

2400// Remove a binop and the shuffle by rearranging the constant:

2401// shuffle (op V, C0), (op V, C1), M --> op V, C'

2402// shuffle (op C0, V), (op C1, V), M --> op C', V

2403 V =X;

2404 }else {

2405// If there are 2 different variable operands, we must create a new shuffle

2406// (select) first, so check uses to ensure that we don't end up with more

2407// instructions than we started with.

2408if (!B0->hasOneUse() && !B1->hasOneUse())

2409returnnullptr;

2410

2411// If we use the original shuffle mask and op1 is *variable*, we would be

2412// putting an undef into operand 1 of div/rem/shift. This is either UB or

2413// poison. We do not have to guard against UB when *constants* are op1

2414// because safe constants guarantee that we do not overflow sdiv/srem (and

2415// there's no danger for other opcodes).

2416// TODO: To allow this case, create a new shuffle mask with no undefs.

2417if (MightCreatePoisonOrUB && !ConstantsAreOp1)

2418returnnullptr;

2419

2420// Note: In general, we do not create new shuffles in InstCombine because we

2421// do not know if a target can lower an arbitrary shuffle optimally. In this

2422// case, the shuffle uses the existing mask, so there is no additional risk.

2423

2424// Select the variable vectors first, then perform the binop:

2425// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'

2426// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)

2427 V =Builder.CreateShuffleVector(X,Y, Mask);

2428 }

2429

2430Value *NewBO = ConstantsAreOp1 ?Builder.CreateBinOp(BOpc, V, NewC) :

2431Builder.CreateBinOp(BOpc, NewC, V);

2432

2433// Flags are intersected from the 2 source binops. But there are 2 exceptions:

2434// 1. If we changed an opcode, poison conditions might have changed.

2435// 2. If the shuffle had undef mask elements, the new binop might have undefs

2436// where the original code did not. But if we already made a safe constant,

2437// then there's no danger.

2438if (auto *NewI = dyn_cast<Instruction>(NewBO)) {

2439 NewI->copyIRFlags(B0);

2440 NewI->andIRFlags(B1);

2441if (DropNSW)

2442 NewI->setHasNoSignedWrap(false);

2443if (is_contained(Mask,PoisonMaskElem) && !MightCreatePoisonOrUB)

2444 NewI->dropPoisonGeneratingFlags();

2445 }

2446returnreplaceInstUsesWith(Shuf, NewBO);

2447}

2448

2449/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.

2450/// Example (little endian):

2451/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>

2452staticInstruction *foldTruncShuffle(ShuffleVectorInst &Shuf,

2453bool IsBigEndian) {

2454// This must be a bitcasted shuffle of 1 vector integer operand.

2455Type *DestType = Shuf.getType();

2456Value *X;

2457if (!match(Shuf.getOperand(0),m_BitCast(m_Value(X))) ||

2458 !match(Shuf.getOperand(1),m_Poison()) || !DestType->isIntOrIntVectorTy())

2459returnnullptr;

2460

2461// The source type must have the same number of elements as the shuffle,

2462// and the source element type must be larger than the shuffle element type.

2463Type *SrcType =X->getType();

2464if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||

2465 cast<FixedVectorType>(SrcType)->getNumElements() !=

2466 cast<FixedVectorType>(DestType)->getNumElements() ||

2467 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)

2468returnnullptr;

2469

2470assert(Shuf.changesLength() && !Shuf.increasesLength() &&

2471"Expected a shuffle that decreases length");

2472

2473// Last, check that the mask chooses the correct low bits for each narrow

2474// element in the result.

2475uint64_t TruncRatio =

2476 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();

2477ArrayRef<int> Mask = Shuf.getShuffleMask();

2478for (unsigned i = 0, e = Mask.size(); i != e; ++i) {

2479if (Mask[i] ==PoisonMaskElem)

2480continue;

2481uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;

2482assert(LSBIndex <= INT32_MAX &&"Overflowed 32-bits");

2483if (Mask[i] != (int)LSBIndex)

2484returnnullptr;

2485 }

2486

2487returnnewTruncInst(X, DestType);

2488}

2489

2490/// Match a shuffle-select-shuffle pattern where the shuffles are widening and

2491/// narrowing (concatenating with poison and extracting back to the original

2492/// length). This allows replacing the wide select with a narrow select.

2493staticInstruction *narrowVectorSelect(ShuffleVectorInst &Shuf,

2494InstCombiner::BuilderTy &Builder) {

2495// This must be a narrowing identity shuffle. It extracts the 1st N elements

2496// of the 1st vector operand of a shuffle.

2497if (!match(Shuf.getOperand(1),m_Poison()) || !Shuf.isIdentityWithExtract())

2498returnnullptr;

2499

2500// The vector being shuffled must be a vector select that we can eliminate.

2501// TODO: The one-use requirement could be eased if X and/or Y are constants.

2502Value *Cond, *X, *Y;

2503if (!match(Shuf.getOperand(0),

2504m_OneUse(m_Select(m_Value(Cond),m_Value(X),m_Value(Y)))))

2505returnnullptr;

2506

2507// We need a narrow condition value. It must be extended with poison elements

2508// and have the same number of elements as this shuffle.

2509unsigned NarrowNumElts =

2510 cast<FixedVectorType>(Shuf.getType())->getNumElements();

2511Value *NarrowCond;

2512if (!match(Cond,m_OneUse(m_Shuffle(m_Value(NarrowCond),m_Poison()))) ||

2513 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=

2514 NarrowNumElts ||

2515 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())

2516returnnullptr;

2517

2518// shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)

2519// -->

2520// sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)

2521Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());

2522Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());

2523returnSelectInst::Create(NarrowCond, NarrowX, NarrowY);

2524}

2525

2526/// Canonicalize FP negate/abs after shuffle.

2527staticInstruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,

2528InstCombiner::BuilderTy &Builder) {

2529auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));

2530Value *X;

2531if (!S0 || !match(S0,m_CombineOr(m_FNeg(m_Value(X)),m_FAbs(m_Value(X)))))

2532returnnullptr;

2533

2534bool IsFNeg = S0->getOpcode() == Instruction::FNeg;

2535

2536// Match 1-input (unary) shuffle.

2537// shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)

2538if (S0->hasOneUse() &&match(Shuf.getOperand(1),m_Poison())) {

2539Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());

2540if (IsFNeg)

2541returnUnaryOperator::CreateFNegFMF(NewShuf, S0);

2542

2543Function *FAbs =Intrinsic::getOrInsertDeclaration(

2544 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());

2545CallInst *NewF =CallInst::Create(FAbs, {NewShuf});

2546 NewF->setFastMathFlags(S0->getFastMathFlags());

2547return NewF;

2548 }

2549

2550// Match 2-input (binary) shuffle.

2551auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));

2552Value *Y;

2553if (!S1 || !match(S1,m_CombineOr(m_FNeg(m_Value(Y)),m_FAbs(m_Value(Y)))) ||

2554 S0->getOpcode() !=S1->getOpcode() ||

2555 (!S0->hasOneUse() && !S1->hasOneUse()))

2556returnnullptr;

2557

2558// shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)

2559Value *NewShuf = Builder.CreateShuffleVector(X,Y, Shuf.getShuffleMask());

2560Instruction *NewF;

2561if (IsFNeg) {

2562 NewF = UnaryOperator::CreateFNeg(NewShuf);

2563 }else {

2564Function *FAbs =Intrinsic::getOrInsertDeclaration(

2565 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());

2566 NewF =CallInst::Create(FAbs, {NewShuf});

2567 }

2568 NewF->copyIRFlags(S0);

2569 NewF->andIRFlags(S1);

2570return NewF;

2571}

2572

2573/// Canonicalize casts after shuffle.

2574staticInstruction *foldCastShuffle(ShuffleVectorInst &Shuf,

2575InstCombiner::BuilderTy &Builder) {

2576// Do we have 2 matching cast operands?

2577auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));

2578auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));

2579if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||

2580 Cast0->getSrcTy() != Cast1->getSrcTy())

2581returnnullptr;

2582

2583// TODO: Allow other opcodes? That would require easing the type restrictions

2584// below here.

2585CastInst::CastOps CastOpcode = Cast0->getOpcode();

2586switch (CastOpcode) {

2587case Instruction::FPToSI:

2588case Instruction::FPToUI:

2589case Instruction::SIToFP:

2590case Instruction::UIToFP:

2591break;

2592default:

2593returnnullptr;

2594 }

2595

2596VectorType *ShufTy = Shuf.getType();

2597VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());

2598VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());

2599

2600// TODO: Allow length-increasing shuffles?

2601if (ShufTy->getElementCount().getKnownMinValue() >

2602 ShufOpTy->getElementCount().getKnownMinValue())

2603returnnullptr;

2604

2605// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?

2606assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&

2607"Expected fixed vector operands for casts and binary shuffle");

2608if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())

2609returnnullptr;

2610

2611// At least one of the operands must have only one use (the shuffle).

2612if (!Cast0->hasOneUse() && !Cast1->hasOneUse())

2613returnnullptr;

2614

2615// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)

2616Value *X = Cast0->getOperand(0);

2617Value *Y = Cast1->getOperand(0);

2618Value *NewShuf = Builder.CreateShuffleVector(X,Y, Shuf.getShuffleMask());

2619returnCastInst::Create(CastOpcode, NewShuf, ShufTy);

2620}

2621

2622/// Try to fold an extract subvector operation.

2623staticInstruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {

2624Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

2625if (!Shuf.isIdentityWithExtract() || !match(Op1,m_Poison()))

2626returnnullptr;

2627

2628// Check if we are extracting all bits of an inserted scalar:

2629// extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type

2630Value *X;

2631if (match(Op0,m_BitCast(m_InsertElt(m_Value(),m_Value(X),m_Zero()))) &&

2632X->getType()->getPrimitiveSizeInBits() ==

2633 Shuf.getType()->getPrimitiveSizeInBits())

2634returnnewBitCastInst(X, Shuf.getType());

2635

2636// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.

2637Value *Y;

2638ArrayRef<int> Mask;

2639if (!match(Op0,m_Shuffle(m_Value(X),m_Value(Y),m_Mask(Mask))))

2640returnnullptr;

2641

2642// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,

2643// then combining may result in worse codegen.

2644if (!Op0->hasOneUse())

2645returnnullptr;

2646

2647// We are extracting a subvector from a shuffle. Remove excess elements from

2648// the 1st shuffle mask to eliminate the extract.

2649//

2650// This transform is conservatively limited to identity extracts because we do

2651// not allow arbitrary shuffle mask creation as a target-independent transform

2652// (because we can't guarantee that will lower efficiently).

2653//

2654// If the extracting shuffle has an poison mask element, it transfers to the

2655// new shuffle mask. Otherwise, copy the original mask element. Example:

2656// shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->

2657// shuf X, Y, <C0, poison, C2, poison>

2658unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();

2659SmallVector<int, 16> NewMask(NumElts);

2660assert(NumElts < Mask.size() &&

2661"Identity with extract must have less elements than its inputs");

2662

2663for (unsigned i = 0; i != NumElts; ++i) {

2664int ExtractMaskElt = Shuf.getMaskValue(i);

2665int MaskElt = Mask[i];

2666 NewMask[i] = ExtractMaskElt ==PoisonMaskElem ? ExtractMaskElt : MaskElt;

2667 }

2668returnnewShuffleVectorInst(X,Y, NewMask);

2669}

2670

2671/// Try to replace a shuffle with an insertelement or try to replace a shuffle

2672/// operand with the operand of an insertelement.

2673staticInstruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,

2674InstCombinerImpl &IC) {

2675Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);

2676SmallVector<int, 16> Mask;

2677 Shuf.getShuffleMask(Mask);

2678

2679int NumElts = Mask.size();

2680int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();

2681

2682// This is a specialization of a fold in SimplifyDemandedVectorElts. We may

2683// not be able to handle it there if the insertelement has >1 use.

2684// If the shuffle has an insertelement operand but does not choose the

2685// inserted scalar element from that value, then we can replace that shuffle

2686// operand with the source vector of the insertelement.

2687Value *X;

2688uint64_t IdxC;

2689if (match(V0,m_InsertElt(m_Value(X),m_Value(),m_ConstantInt(IdxC)))) {

2690// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask

2691if (!is_contained(Mask, (int)IdxC))

2692return IC.replaceOperand(Shuf, 0,X);

2693 }

2694if (match(V1,m_InsertElt(m_Value(X),m_Value(),m_ConstantInt(IdxC)))) {

2695// Offset the index constant by the vector width because we are checking for

2696// accesses to the 2nd vector input of the shuffle.

2697 IdxC += InpNumElts;

2698// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask

2699if (!is_contained(Mask, (int)IdxC))

2700return IC.replaceOperand(Shuf, 1,X);

2701 }

2702// For the rest of the transform, the shuffle must not change vector sizes.

2703// TODO: This restriction could be removed if the insert has only one use

2704// (because the transform would require a new length-changing shuffle).

2705if (NumElts != InpNumElts)

2706returnnullptr;

2707

2708// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'

2709auto isShufflingScalarIntoOp1 = [&](Value *&Scalar,ConstantInt *&IndexC) {

2710// We need an insertelement with a constant index.

2711if (!match(V0,m_InsertElt(m_Value(),m_Value(Scalar),

2712m_ConstantInt(IndexC))))

2713returnfalse;

2714

2715// Test the shuffle mask to see if it splices the inserted scalar into the

2716// operand 1 vector of the shuffle.

2717int NewInsIndex = -1;

2718for (int i = 0; i != NumElts; ++i) {

2719// Ignore undef mask elements.

2720if (Mask[i] == -1)

2721continue;

2722

2723// The shuffle takes elements of operand 1 without lane changes.

2724if (Mask[i] == NumElts + i)

2725continue;

2726

2727// The shuffle must choose the inserted scalar exactly once.

2728if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())

2729returnfalse;

2730

2731// The shuffle is placing the inserted scalar into element i.

2732 NewInsIndex = i;

2733 }

2734

2735assert(NewInsIndex != -1 &&"Did not fold shuffle with unused operand?");

2736

2737// Index is updated to the potentially translated insertion lane.

2738 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);

2739returntrue;

2740 };

2741

2742// If the shuffle is unnecessary, insert the scalar operand directly into

2743// operand 1 of the shuffle. Example:

2744// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0

2745Value *Scalar;

2746ConstantInt *IndexC;

2747if (isShufflingScalarIntoOp1(Scalar, IndexC))

2748returnInsertElementInst::Create(V1, Scalar, IndexC);

2749

2750// Try again after commuting shuffle. Example:

2751// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->

2752// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3

2753std::swap(V0, V1);

2754ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);

2755if (isShufflingScalarIntoOp1(Scalar, IndexC))

2756returnInsertElementInst::Create(V1, Scalar, IndexC);

2757

2758returnnullptr;

2759}

2760

2761staticInstruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {

2762// Match the operands as identity with padding (also known as concatenation

2763// with undef) shuffles of the same source type. The backend is expected to

2764// recreate these concatenations from a shuffle of narrow operands.

2765auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));

2766auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));

2767if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||

2768 !Shuffle1 || !Shuffle1->isIdentityWithPadding())

2769returnnullptr;

2770

2771// We limit this transform to power-of-2 types because we expect that the

2772// backend can convert the simplified IR patterns to identical nodes as the

2773// original IR.

2774// TODO: If we can verify the same behavior for arbitrary types, the

2775// power-of-2 checks can be removed.

2776Value *X = Shuffle0->getOperand(0);

2777Value *Y = Shuffle1->getOperand(0);

2778if (X->getType() !=Y->getType() ||

2779 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||

2780 !isPowerOf2_32(

2781 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||

2782 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||

2783match(X,m_Undef()) ||match(Y,m_Undef()))

2784returnnullptr;

2785assert(match(Shuffle0->getOperand(1),m_Undef()) &&

2786match(Shuffle1->getOperand(1),m_Undef()) &&

2787"Unexpected operand for identity shuffle");

2788

2789// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source

2790// operands directly by adjusting the shuffle mask to account for the narrower

2791// types:

2792// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'

2793int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();

2794int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();

2795assert(WideElts > NarrowElts &&"Unexpected types for identity with padding");

2796

2797ArrayRef<int> Mask = Shuf.getShuffleMask();

2798SmallVector<int, 16> NewMask(Mask.size(), -1);

2799for (int i = 0, e = Mask.size(); i != e; ++i) {

2800if (Mask[i] == -1)

2801continue;

2802

2803// If this shuffle is choosing an undef element from 1 of the sources, that

2804// element is undef.

2805if (Mask[i] < WideElts) {

2806if (Shuffle0->getMaskValue(Mask[i]) == -1)

2807continue;

2808 }else {

2809if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)

2810continue;

2811 }

2812

2813// If this shuffle is choosing from the 1st narrow op, the mask element is

2814// the same. If this shuffle is choosing from the 2nd narrow op, the mask

2815// element is offset down to adjust for the narrow vector widths.

2816if (Mask[i] < WideElts) {

2817assert(Mask[i] < NarrowElts &&"Unexpected shuffle mask");

2818 NewMask[i] = Mask[i];

2819 }else {

2820assert(Mask[i] < (WideElts + NarrowElts) &&"Unexpected shuffle mask");

2821 NewMask[i] = Mask[i] - (WideElts - NarrowElts);

2822 }

2823 }

2824returnnewShuffleVectorInst(X,Y, NewMask);

2825}

2826

2827// Splatting the first element of the result of a BinOp, where any of the

2828// BinOp's operands are the result of a first element splat can be simplified to

2829// splatting the first element of the result of the BinOp

2830Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {

2831if (!match(SVI.getOperand(1),m_Poison()) ||

2832 !match(SVI.getShuffleMask(),m_ZeroMask()) ||

2833 !SVI.getOperand(0)->hasOneUse())

2834returnnullptr;

2835

2836Value *Op0 = SVI.getOperand(0);

2837Value *X, *Y;

2838if (!match(Op0,m_BinOp(m_Shuffle(m_Value(X),m_Poison(),m_ZeroMask()),

2839m_Value(Y))) &&

2840 !match(Op0,m_BinOp(m_Value(X),

2841m_Shuffle(m_Value(Y),m_Poison(),m_ZeroMask()))))

2842returnnullptr;

2843if (X->getType() !=Y->getType())

2844returnnullptr;

2845

2846auto *BinOp = cast<BinaryOperator>(Op0);

2847if (!isSafeToSpeculativelyExecuteWithVariableReplaced(BinOp))

2848returnnullptr;

2849

2850Value *NewBO =Builder.CreateBinOp(BinOp->getOpcode(),X,Y);

2851if (auto NewBOI = dyn_cast<Instruction>(NewBO))

2852 NewBOI->copyIRFlags(BinOp);

2853

2854returnnewShuffleVectorInst(NewBO, SVI.getShuffleMask());

2855}

2856

2857Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

2858Value *LHS = SVI.getOperand(0);

2859Value *RHS = SVI.getOperand(1);

2860SimplifyQuery ShufQuery =SQ.getWithInstruction(&SVI);

2861if (auto *V =simplifyShuffleVectorInst(LHS,RHS, SVI.getShuffleMask(),

2862 SVI.getType(), ShufQuery))

2863returnreplaceInstUsesWith(SVI, V);

2864

2865if (Instruction *I =simplifyBinOpSplats(SVI))

2866returnI;

2867

2868// Canonicalize splat shuffle to use poison RHS. Handle this explicitly in

2869// order to support scalable vectors.

2870if (match(SVI.getShuffleMask(),m_ZeroMask()) && !isa<PoisonValue>(RHS))

2871returnreplaceOperand(SVI, 1,PoisonValue::get(RHS->getType()));

2872

2873if (isa<ScalableVectorType>(LHS->getType()))

2874returnnullptr;

2875

2876unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();

2877unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();

2878

2879// shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)

2880//

2881// if X and Y are of the same (vector) type, and the element size is not

2882// changed by the bitcasts, we can distribute the bitcasts through the

2883// shuffle, hopefully reducing the number of instructions. We make sure that

2884// at least one bitcast only has one use, so we don't *increase* the number of

2885// instructions here.

2886Value *X, *Y;

2887if (match(LHS,m_BitCast(m_Value(X))) &&match(RHS,m_BitCast(m_Value(Y))) &&

2888X->getType()->isVectorTy() &&X->getType() ==Y->getType() &&

2889X->getType()->getScalarSizeInBits() ==

2890 SVI.getType()->getScalarSizeInBits() &&

2891 (LHS->hasOneUse() ||RHS->hasOneUse())) {

2892Value *V =Builder.CreateShuffleVector(X,Y, SVI.getShuffleMask(),

2893 SVI.getName() +".uncasted");

2894returnnewBitCastInst(V, SVI.getType());

2895 }

2896

2897ArrayRef<int> Mask = SVI.getShuffleMask();

2898

2899// Peek through a bitcasted shuffle operand by scaling the mask. If the

2900// simulated shuffle can simplify, then this shuffle is unnecessary:

2901// shuf (bitcast X), undef, Mask --> bitcast X'

2902// TODO: This could be extended to allow length-changing shuffles.

2903// The transform might also be obsoleted if we allowed canonicalization

2904// of bitcasted shuffles.

2905if (match(LHS,m_BitCast(m_Value(X))) &&match(RHS,m_Undef()) &&

2906X->getType()->isVectorTy() && VWidth == LHSWidth) {

2907// Try to create a scaled mask constant.

2908auto *XType = cast<FixedVectorType>(X->getType());

2909unsigned XNumElts = XType->getNumElements();

2910SmallVector<int, 16> ScaledMask;

2911if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {

2912// If the shuffled source vector simplifies, cast that value to this

2913// shuffle's type.

2914if (auto *V =simplifyShuffleVectorInst(X,UndefValue::get(XType),

2915 ScaledMask, XType, ShufQuery))

2916returnBitCastInst::Create(Instruction::BitCast, V, SVI.getType());

2917 }

2918 }

2919

2920// shuffle x, x, mask --> shuffle x, undef, mask'

2921if (LHS ==RHS) {

2922assert(!match(RHS,m_Undef()) &&

2923"Shuffle with 2 undef ops not simplified?");

2924returnnewShuffleVectorInst(LHS,createUnaryMask(Mask, LHSWidth));

2925 }

2926

2927// shuffle undef, x, mask --> shuffle x, undef, mask'

2928if (match(LHS,m_Undef())) {

2929 SVI.commute();

2930return &SVI;

2931 }

2932

2933if (Instruction *I =canonicalizeInsertSplat(SVI,Builder))

2934returnI;

2935

2936if (Instruction *I =foldSelectShuffle(SVI))

2937returnI;

2938

2939if (Instruction *I =foldTruncShuffle(SVI,DL.isBigEndian()))

2940returnI;

2941

2942if (Instruction *I =narrowVectorSelect(SVI,Builder))

2943returnI;

2944

2945if (Instruction *I =foldShuffleOfUnaryOps(SVI,Builder))

2946returnI;

2947

2948if (Instruction *I =foldCastShuffle(SVI,Builder))

2949returnI;

2950

2951APInt PoisonElts(VWidth, 0);

2952APInt AllOnesEltMask(APInt::getAllOnes(VWidth));

2953if (Value *V =SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {

2954if (V != &SVI)

2955returnreplaceInstUsesWith(SVI, V);

2956return &SVI;

2957 }

2958

2959if (Instruction *I =foldIdentityExtractShuffle(SVI))

2960returnI;

2961

2962// These transforms have the potential to lose undef knowledge, so they are

2963// intentionally placed after SimplifyDemandedVectorElts().

2964if (Instruction *I =foldShuffleWithInsert(SVI, *this))

2965returnI;

2966if (Instruction *I =foldIdentityPaddedShuffles(SVI))

2967returnI;

2968

2969if (match(RHS,m_Constant())) {

2970if (auto *SI = dyn_cast<SelectInst>(LHS)) {

2971// We cannot do this fold for elementwise select since ShuffleVector is

2972// not elementwise.

2973if (SI->getCondition()->getType()->isIntegerTy() &&

2974 (isa<PoisonValue>(RHS) ||

2975isGuaranteedNotToBePoison(SI->getCondition()))) {

2976if (Instruction *I =FoldOpIntoSelect(SVI, SI))

2977returnI;

2978 }

2979 }

2980if (auto *PN = dyn_cast<PHINode>(LHS)) {

2981if (Instruction *I =foldOpIntoPhi(SVI, PN,/*AllowMultipleUses=*/true))

2982returnI;

2983 }

2984 }

2985

2986if (match(RHS,m_Poison()) &&canEvaluateShuffled(LHS, Mask)) {

2987Value *V =evaluateInDifferentElementOrder(LHS, Mask,Builder);

2988returnreplaceInstUsesWith(SVI, V);

2989 }

2990

2991// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to

2992// a non-vector type. We can instead bitcast the original vector followed by

2993// an extract of the desired element:

2994//

2995// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,

2996// <4 x i32> <i32 0, i32 1, i32 2, i32 3>

2997// %1 = bitcast <4 x i8> %sroa to i32

2998// Becomes:

2999// %bc = bitcast <16 x i8> %in to <4 x i32>

3000// %ext = extractelement <4 x i32> %bc, i32 0

3001//

3002// If the shuffle is extracting a contiguous range of values from the input

3003// vector then each use which is a bitcast of the extracted size can be

3004// replaced. This will work if the vector types are compatible, and the begin

3005// index is aligned to a value in the casted vector type. If the begin index

3006// isn't aligned then we can shuffle the original vector (keeping the same

3007// vector type) before extracting.

3008//

3009// This code will bail out if the target type is fundamentally incompatible

3010// with vectors of the source type.

3011//

3012// Example of <16 x i8>, target type i32:

3013// Index range [4,8): v-----------v Will work.

3014// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+

3015// <16 x i8>: | | | | | | | | | | | | | | | | |

3016// <4 x i32>: | | | | |

3017// +-----------+-----------+-----------+-----------+

3018// Index range [6,10): ^-----------^ Needs an extra shuffle.

3019// Target type i40: ^--------------^ Won't work, bail.

3020bool MadeChange =false;

3021if (isShuffleExtractingFromLHS(SVI, Mask)) {

3022Value *V =LHS;

3023unsigned MaskElems = Mask.size();

3024auto *SrcTy = cast<FixedVectorType>(V->getType());

3025unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();

3026unsigned SrcElemBitWidth =DL.getTypeSizeInBits(SrcTy->getElementType());

3027assert(SrcElemBitWidth &&"vector elements must have a bitwidth");

3028unsigned SrcNumElems = SrcTy->getNumElements();

3029SmallVector<BitCastInst *, 8> BCs;

3030DenseMap<Type *, Value *> NewBCs;

3031for (User *U : SVI.users())

3032if (BitCastInst *BC = dyn_cast<BitCastInst>(U))

3033if (!BC->use_empty())

3034// Only visit bitcasts that weren't previously handled.

3035 BCs.push_back(BC);

3036for (BitCastInst *BC : BCs) {

3037unsigned BegIdx = Mask.front();

3038Type *TgtTy = BC->getDestTy();

3039unsigned TgtElemBitWidth =DL.getTypeSizeInBits(TgtTy);

3040if (!TgtElemBitWidth)

3041continue;

3042unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;

3043bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;

3044bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);

3045if (!VecBitWidthsEqual)

3046continue;

3047if (!VectorType::isValidElementType(TgtTy))

3048continue;

3049auto *CastSrcTy =FixedVectorType::get(TgtTy, TgtNumElems);

3050if (!BegIsAligned) {

3051// Shuffle the input so [0,NumElements) contains the output, and

3052// [NumElems,SrcNumElems) is undef.

3053SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);

3054for (unsignedI = 0, E = MaskElems,Idx = BegIdx;I != E; ++Idx, ++I)

3055 ShuffleMask[I] =Idx;

3056 V =Builder.CreateShuffleVector(V, ShuffleMask,

3057 SVI.getName() +".extract");

3058 BegIdx = 0;

3059 }

3060unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;

3061assert(SrcElemsPerTgtElem);

3062 BegIdx /= SrcElemsPerTgtElem;

3063auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);

3064if (Inserted)

3065 It->second =Builder.CreateBitCast(V, CastSrcTy, SVI.getName() +".bc");

3066auto *Ext =Builder.CreateExtractElement(It->second, BegIdx,

3067 SVI.getName() +".extract");

3068// The shufflevector isn't being replaced: the bitcast that used it

3069// is. InstCombine will visit the newly-created instructions.

3070replaceInstUsesWith(*BC, Ext);

3071 MadeChange =true;

3072 }

3073 }

3074

3075// If the LHS is a shufflevector itself, see if we can combine it with this

3076// one without producing an unusual shuffle.

3077// Cases that might be simplified:

3078// 1.

3079// x1=shuffle(v1,v2,mask1)

3080// x=shuffle(x1,undef,mask)

3081// ==>

3082// x=shuffle(v1,undef,newMask)

3083// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1

3084// 2.

3085// x1=shuffle(v1,undef,mask1)

3086// x=shuffle(x1,x2,mask)

3087// where v1.size() == mask1.size()

3088// ==>

3089// x=shuffle(v1,x2,newMask)

3090// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]

3091// 3.

3092// x2=shuffle(v2,undef,mask2)

3093// x=shuffle(x1,x2,mask)

3094// where v2.size() == mask2.size()

3095// ==>

3096// x=shuffle(x1,v2,newMask)

3097// newMask[i] = (mask[i] < x1.size())

3098// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()

3099// 4.

3100// x1=shuffle(v1,undef,mask1)

3101// x2=shuffle(v2,undef,mask2)

3102// x=shuffle(x1,x2,mask)

3103// where v1.size() == v2.size()

3104// ==>

3105// x=shuffle(v1,v2,newMask)

3106// newMask[i] = (mask[i] < x1.size())

3107// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()

3108//

3109// Here we are really conservative:

3110// we are absolutely afraid of producing a shuffle mask not in the input

3111// program, because the code gen may not be smart enough to turn a merged

3112// shuffle into two specific shuffles: it may produce worse code. As such,

3113// we only merge two shuffles if the result is either a splat or one of the

3114// input shuffle masks. In this case, merging the shuffles just removes

3115// one instruction, which we know is safe. This is good for things like

3116// turning: (splat(splat)) -> splat, or

3117// merge(V[0..n], V[n+1..2n]) -> V[0..2n]

3118ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);

3119ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);

3120if (LHSShuffle)

3121if (!match(LHSShuffle->getOperand(1),m_Poison()) &&

3122 !match(RHS,m_Poison()))

3123 LHSShuffle =nullptr;

3124if (RHSShuffle)

3125if (!match(RHSShuffle->getOperand(1),m_Poison()))

3126 RHSShuffle =nullptr;

3127if (!LHSShuffle && !RHSShuffle)

3128return MadeChange ? &SVI :nullptr;

3129

3130Value* LHSOp0 =nullptr;

3131Value* LHSOp1 =nullptr;

3132Value* RHSOp0 =nullptr;

3133unsigned LHSOp0Width = 0;

3134unsigned RHSOp0Width = 0;

3135if (LHSShuffle) {

3136 LHSOp0 = LHSShuffle->getOperand(0);

3137 LHSOp1 = LHSShuffle->getOperand(1);

3138 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();

3139 }

3140if (RHSShuffle) {

3141 RHSOp0 = RHSShuffle->getOperand(0);

3142 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();

3143 }

3144Value* newLHS =LHS;

3145Value* newRHS =RHS;

3146if (LHSShuffle) {

3147// case 1

3148if (match(RHS,m_Poison())) {

3149 newLHS = LHSOp0;

3150 newRHS = LHSOp1;

3151 }

3152// case 2 or 4

3153elseif (LHSOp0Width == LHSWidth) {

3154 newLHS = LHSOp0;

3155 }

3156 }

3157// case 3 or 4

3158if (RHSShuffle && RHSOp0Width == LHSWidth) {

3159 newRHS = RHSOp0;

3160 }

3161// case 4

3162if (LHSOp0 == RHSOp0) {

3163 newLHS = LHSOp0;

3164 newRHS =nullptr;

3165 }

3166

3167if (newLHS ==LHS && newRHS ==RHS)

3168return MadeChange ? &SVI :nullptr;

3169

3170ArrayRef<int> LHSMask;

3171ArrayRef<int> RHSMask;

3172if (newLHS !=LHS)

3173 LHSMask = LHSShuffle->getShuffleMask();

3174if (RHSShuffle && newRHS !=RHS)

3175 RHSMask = RHSShuffle->getShuffleMask();

3176

3177unsigned newLHSWidth = (newLHS !=LHS) ? LHSOp0Width : LHSWidth;

3178SmallVector<int, 16> newMask;

3179boolisSplat =true;

3180int SplatElt = -1;

3181// Create a new mask for the new ShuffleVectorInst so that the new

3182// ShuffleVectorInst is equivalent to the original one.

3183for (unsigned i = 0; i < VWidth; ++i) {

3184int eltMask;

3185if (Mask[i] < 0) {

3186// This element is a poison value.

3187 eltMask = -1;

3188 }elseif (Mask[i] < (int)LHSWidth) {

3189// This element is from left hand side vector operand.

3190//

3191// If LHS is going to be replaced (case 1, 2, or 4), calculate the

3192// new mask value for the element.

3193if (newLHS !=LHS) {

3194 eltMask = LHSMask[Mask[i]];

3195// If the value selected is an poison value, explicitly specify it

3196// with a -1 mask value.

3197if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))

3198 eltMask = -1;

3199 }else

3200 eltMask = Mask[i];

3201 }else {

3202// This element is from right hand side vector operand

3203//

3204// If the value selected is a poison value, explicitly specify it

3205// with a -1 mask value. (case 1)

3206if (match(RHS,m_Poison()))

3207 eltMask = -1;

3208// If RHS is going to be replaced (case 3 or 4), calculate the

3209// new mask value for the element.

3210elseif (newRHS !=RHS) {

3211 eltMask = RHSMask[Mask[i]-LHSWidth];

3212// If the value selected is an poison value, explicitly specify it

3213// with a -1 mask value.

3214if (eltMask >= (int)RHSOp0Width) {

3215assert(match(RHSShuffle->getOperand(1),m_Poison()) &&

3216"should have been check above");

3217 eltMask = -1;

3218 }

3219 }else

3220 eltMask = Mask[i]-LHSWidth;

3221

3222// If LHS's width is changed, shift the mask value accordingly.

3223// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any

3224// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.

3225// If newRHS == newLHS, we want to remap any references from newRHS to

3226// newLHS so that we can properly identify splats that may occur due to

3227// obfuscation across the two vectors.

3228if (eltMask >= 0 && newRHS !=nullptr && newLHS != newRHS)

3229 eltMask += newLHSWidth;

3230 }

3231

3232// Check if this could still be a splat.

3233if (eltMask >= 0) {

3234if (SplatElt >= 0 && SplatElt != eltMask)

3235isSplat =false;

3236 SplatElt = eltMask;

3237 }

3238

3239 newMask.push_back(eltMask);

3240 }

3241

3242// If the result mask is equal to one of the original shuffle masks,

3243// or is a splat, do the replacement.

3244if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {

3245if (!newRHS)

3246 newRHS =PoisonValue::get(newLHS->getType());

3247returnnewShuffleVectorInst(newLHS, newRHS, newMask);

3248 }

3249

3250return MadeChange ? &SVI :nullptr;

3251}

static const LLT S1

Definition:AMDGPULegalizerInfo.cpp:282

PHI

Rewrite undef for PHI

Definition:AMDGPURewriteUndefForPHI.cpp:100

APInt.h

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

ArrayRef.h

Casting.h

Constants.h

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

DenseMap.h

This file defines the DenseMap class.

DerivedTypes.h

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

GEP

Hexagon Common GEP

Definition:HexagonCommonGEP.cpp:170

InstCombineInternal.h

This file provides internal interfaces used to implement the InstCombine.

foldConstantInsEltIntoShuffle

static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)

insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...

Definition:InstCombineVectorOps.cpp:1472

evaluateInDifferentElementOrder

static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)

Definition:InstCombineVectorOps.cpp:1991

collectSingleShuffleElements

static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)

If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...

Definition:InstCombineVectorOps.cpp:621

collectShuffleElements

static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)

Definition:InstCombineVectorOps.cpp:781

findDemandedEltsByAllUsers

static APInt findDemandedEltsByAllUsers(Value *V)

Find union of elements of V demanded by all its users.

Definition:InstCombineVectorOps.cpp:367

foldTruncInsEltPair

static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)

If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...

Definition:InstCombineVectorOps.cpp:1609

foldSelectShuffleWith1Binop

static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)

Definition:InstCombineVectorOps.cpp:2212

foldIdentityPaddedShuffles

static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)

Definition:InstCombineVectorOps.cpp:2761

foldIdentityExtractShuffle

static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)

Try to fold an extract subvector operation.

Definition:InstCombineVectorOps.cpp:2623

foldInsEltIntoSplat

static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)

Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...

Definition:InstCombineVectorOps.cpp:1353

ShuffleOps

std::pair< Value *, Value * > ShuffleOps

We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.

Definition:InstCombineVectorOps.cpp:779

foldShuffleWithInsert

static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)

Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...

Definition:InstCombineVectorOps.cpp:2673

canonicalizeInsertSplat

static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)

If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...

Definition:InstCombineVectorOps.cpp:2281

foldTruncShuffle

static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)

Convert a narrowing shuffle of a bitcasted vector into a vector truncate.

Definition:InstCombineVectorOps.cpp:2452

replaceExtractElements

static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)

If we have insertion into a vector that is wider than the vector that we are extracting from,...

Definition:InstCombineVectorOps.cpp:694

cheapToScalarize

static bool cheapToScalarize(Value *V, Value *EI)

Return true if the value is cheaper to scalarize than it is to leave as a vector operation.

Definition:InstCombineVectorOps.cpp:58

buildNew

static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)

Rebuild a new instruction just like 'I' but with the new operands given.

Definition:InstCombineVectorOps.cpp:1915

canEvaluateShuffled

static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)

Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...

Definition:InstCombineVectorOps.cpp:1827

foldSelectShuffleOfSelectShuffle

static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)

A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.

Definition:InstCombineVectorOps.cpp:2165

hoistInsEltConst

static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)

If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...

Definition:InstCombineVectorOps.cpp:1449

findDemandedEltsBySingleUser

static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)

Find elements of V demanded by UserInstr.

Definition:InstCombineVectorOps.cpp:323

foldShuffleOfUnaryOps

static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)

Canonicalize FP negate/abs after shuffle.

Definition:InstCombineVectorOps.cpp:2527

foldCastShuffle

static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)

Canonicalize casts after shuffle.

Definition:InstCombineVectorOps.cpp:2574

narrowInsElt

static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)

If both the base vector and the inserted element are extended from the same type, do the insert eleme...

Definition:InstCombineVectorOps.cpp:1576

isShuffleEquivalentToSelect

static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)

Definition:InstCombineVectorOps.cpp:1251

foldInsSequenceIntoSplat

static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)

Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...

Definition:InstCombineVectorOps.cpp:1279

foldInsEltIntoIdentityShuffle

static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)

Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...

Definition:InstCombineVectorOps.cpp:1390

getPreferredVectorIndex

static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)

Given a constant index for a extractelement or insertelement instruction, return it with the canonica...

Definition:InstCombineVectorOps.cpp:390

isShuffleExtractingFromLHS

static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)

Definition:InstCombineVectorOps.cpp:2099

getAlternateBinop

static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)

Binops may be transformed into binops with different opcodes and operands.

Definition:InstCombineVectorOps.cpp:2130

InstCombiner.h

This file provides the interface for the instcombine pass implementation.

InstrTypes.h

InstructionSimplify.h

Instructions.h

isSplat

static bool isSplat(Value *V)

Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).

Definition:LowerMatrixIntrinsics.cpp:102

#define I(x, y, z)

Definition:MD5.cpp:58

uint64_t IntrinsicInst * II

Definition:NVVMIntrRange.cpp:51

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

PatternMatch.h

Cond

const SmallVectorImpl< MachineOperand > & Cond

Definition:RISCVRedundantCopyElimination.cpp:75

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h

This file contains some templates that are useful if you are working with the STL at all.

SmallBitVector.h

This file implements the SmallBitVector class.

SmallVector.h

This file defines the SmallVector class.

Statistic.h

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC

#define STATISTIC(VARNAME, DESC)

Definition:Statistic.h:166

Ptr

@ Ptr

Definition:TargetLibraryInfo.cpp:77

getOpcode

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

Definition:VPlanSLP.cpp:191

VectorUtils.h

narrowVectorSelect

static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)

If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...

Definition:X86ISelLowering.cpp:46752

RHS

Value * RHS

Definition:X86PartialReduction.cpp:74

LHS

Value * LHS

Definition:X86PartialReduction.cpp:73

static const uint32_t IV[8]

Definition:blake3_impl.h:78

VectorType

Definition:ItaniumDemangle.h:1173

bool

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::APInt::getAllOnes

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

Definition:APInt.h:234

llvm::APInt::zextOrTrunc

APInt zextOrTrunc(unsigned width) const

Zero extend or truncate to width.

Definition:APInt.cpp:1007

llvm::APInt::getActiveBits

unsigned getActiveBits() const

Compute the number of active bits in the value.

Definition:APInt.h:1492

llvm::APInt::setBit

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

Definition:APInt.h:1330

llvm::APInt::isAllOnes

bool isAllOnes() const

Determine if all bits are set. This is true for zero-width values.

Definition:APInt.h:371

llvm::APInt::ult

bool ult(const APInt &RHS) const

Unsigned less than comparison.

Definition:APInt.h:1111

llvm::APInt::getOneBitSet

static APInt getOneBitSet(unsigned numBits, unsigned BitNo)

Return an APInt with exactly one bit set in the result.

Definition:APInt.h:239

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::ArrayRef::front

const T & front() const

front - Get the first element.

Definition:ArrayRef.h:171

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition:ArrayRef.h:168

llvm::ArrayRef::slice

ArrayRef< T > slice(size_t N, size_t M) const

slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.

Definition:ArrayRef.h:198

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::getFirstInsertionPt

const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

Definition:BasicBlock.cpp:437

llvm::BasicBlock::getParent

const Function * getParent() const

Return the enclosing method, or null if none.

Definition:BasicBlock.h:220

llvm::BasicBlock::iterator

InstListType::iterator iterator

Instruction iterators...

Definition:BasicBlock.h:177

llvm::BasicBlock::getTerminator

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Definition:BasicBlock.h:240

llvm::BinaryOperator

Definition:InstrTypes.h:170

llvm::BinaryOperator::getOpcode

BinaryOps getOpcode() const

Definition:InstrTypes.h:370

llvm::BinaryOperator::Create

static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)

Construct a binary instruction, given the opcode and the two operands.

Definition:Instructions.cpp:2639

llvm::BinaryOperator::CreateWithCopiedFlags

static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Definition:InstrTypes.h:218

llvm::BitCastInst

This class represents a no-op cast from one type to another.

Definition:Instructions.h:4894

llvm::CallInst

This class represents a function call, abstracting a target machine's calling convention.

Definition:Instructions.h:1479

llvm::CallInst::Create

static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Definition:Instructions.h:1514

llvm::CastInst::Create

static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...

Definition:Instructions.cpp:2972

llvm::CmpInst

This class is the base class for the comparison instructions.

Definition:InstrTypes.h:661

llvm::CmpInst::CreateWithCopiedFlags

static CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...

Definition:Instructions.cpp:3453

llvm::CmpInst::getOpcode

OtherOps getOpcode() const

Get the opcode casted to the right type.

Definition:InstrTypes.h:758

llvm::CmpPredicate

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

Definition:CmpPredicate.h:22

llvm::ConstantAggregateZero::get

static ConstantAggregateZero * get(Type *Ty)

Definition:Constants.cpp:1672

llvm::ConstantExpr::getShuffleVector

static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)

Definition:Constants.cpp:2600

llvm::ConstantExpr::getBinOpIdentity

static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)

Return the identity constant for a binary opcode.

Definition:Constants.cpp:2692

llvm::ConstantInt

This is the shared class of boolean and integer constants.

Definition:Constants.h:83

llvm::ConstantInt::getLimitedValue

uint64_t getLimitedValue(uint64_t Limit=~0ULL) const

getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...

Definition:Constants.h:258

llvm::ConstantInt::getBitWidth

unsigned getBitWidth() const

getBitWidth - Return the scalar bitwidth of this constant.

Definition:Constants.h:151

llvm::ConstantInt::getZExtValue

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

Definition:Constants.h:157

llvm::ConstantInt::getValue

const APInt & getValue() const

Return the constant as an APInt value reference.

Definition:Constants.h:148

llvm::ConstantVector::get

static Constant * get(ArrayRef< Constant * > V)

Definition:Constants.cpp:1421

llvm::Constant

This is an important base class in LLVM.

Definition:Constant.h:42

llvm::Constant::getAllOnesValue

static Constant * getAllOnesValue(Type *Ty)

Definition:Constants.cpp:420

llvm::Constant::getAggregateElement

Constant * getAggregateElement(unsigned Elt) const

For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...

Definition:Constants.cpp:435

llvm::DWARFExpression::Operation

This class represents an Operation in the Expression.

Definition:DWARFExpression.h:32

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::DataLayout::isBigEndian

bool isBigEndian() const

Definition:DataLayout.h:198

llvm::DataLayout::getTypeSizeInBits

TypeSize getTypeSizeInBits(Type *Ty) const

Size examples:

Definition:DataLayout.h:617

llvm::DenseMapBase::try_emplace

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

Definition:DenseMap.h:226

llvm::DenseMapBase::insert

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Definition:DenseMap.h:211

llvm::DenseMap

Definition:DenseMap.h:727

llvm::ElementCount

Definition:TypeSize.h:300

llvm::ExtractElementInst

This instruction extracts a single (scalar) element from a VectorType value.

Definition:Instructions.h:1775

llvm::ExtractElementInst::Create

static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Definition:Instructions.h:1788

llvm::ExtractElementInst::getVectorOperand

Value * getVectorOperand()

Definition:Instructions.h:1799

llvm::ExtractElementInst::getIndexOperand

Value * getIndexOperand()

Definition:Instructions.h:1800

llvm::ExtractElementInst::getVectorOperandType

VectorType * getVectorOperandType() const

Definition:Instructions.h:1804

llvm::FixedVectorType::get

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

Definition:Type.cpp:791

llvm::Function

Definition:Function.h:63

llvm::GetElementPtrInst

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

Definition:Instructions.h:933

llvm::GetElementPtrInst::Create

static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Definition:Instructions.h:956

llvm::GetElementPtrInst::setIsInBounds

void setIsInBounds(bool b=true)

Set or clear the inbounds flag on this GEP instruction.

Definition:Instructions.cpp:1556

llvm::IRBuilderBase::InsertPointGuard

Definition:IRBuilder.h:394

llvm::IRBuilderBase

Common base class shared among various IRBuilders.

Definition:IRBuilder.h:113

llvm::IRBuilderBase::CreateFCmp

Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

Definition:IRBuilder.h:2390

llvm::IRBuilderBase::CreateInsertElement

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

Definition:IRBuilder.h:2511

llvm::IRBuilderBase::CreateExtractElement

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

Definition:IRBuilder.h:2499

llvm::IRBuilderBase::CreateLShr

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Definition:IRBuilder.h:1480

llvm::IRBuilderBase::CreateCast

Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})

Definition:IRBuilder.h:2186

llvm::IRBuilderBase::CreateGEP

Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())

Definition:IRBuilder.h:1874

llvm::IRBuilderBase::getInt64

ConstantInt * getInt64(uint64_t C)

Get a constant 64-bit value.

Definition:IRBuilder.h:510

llvm::IRBuilderBase::CreateBitCast

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

Definition:IRBuilder.h:2152

llvm::IRBuilderBase::CreateShuffleVector

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Definition:IRBuilder.h:2533

llvm::IRBuilderBase::CreateTrunc

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

Definition:IRBuilder.h:2019

llvm::IRBuilderBase::CreateBinOp

Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

Definition:IRBuilder.h:1671

llvm::IRBuilderBase::SetInsertPoint

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Definition:IRBuilder.h:199

llvm::IRBuilderBase::CreateICmp

Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")

Definition:IRBuilder.h:2380

llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >

llvm::InsertElementInst

This instruction inserts a single (scalar) element into a VectorType value.

Definition:Instructions.h:1834

llvm::InsertElementInst::Create

static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Definition:Instructions.h:1848

llvm::InsertElementInst::getType

VectorType * getType() const

Overload to return most specific vector type.

Definition:Instructions.h:1862

llvm::InsertValueInst

This instruction inserts a struct field of array element value into an aggregate value.

Definition:Instructions.h:2485

llvm::InstCombinerImpl

Definition:InstCombineInternal.h:61

llvm::InstCombinerImpl::FoldOpIntoSelect

Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)

Given an instruction with a select as one operand and a constant as the other operand,...

Definition:InstructionCombining.cpp:1687

llvm::InstCombinerImpl::foldOpIntoPhi

Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)

Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...

Definition:InstructionCombining.cpp:1770

llvm::InstCombinerImpl::SimplifyDemandedVectorElts

Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override

The specified value produces a vector with any number of elements.

Definition:InstCombineSimplifyDemanded.cpp:1396

llvm::InstCombinerImpl::foldSelectShuffle

Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)

Try to fold shuffles that are the equivalent of a vector select.

Definition:InstCombineVectorOps.cpp:2313

llvm::InstCombinerImpl::visitInsertValueInst

Instruction * visitInsertValueInst(InsertValueInst &IV)

Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...

Definition:InstCombineVectorOps.cpp:1214

llvm::InstCombinerImpl::visitInsertElementInst

Instruction * visitInsertElementInst(InsertElementInst &IE)

Definition:InstCombineVectorOps.cpp:1672

llvm::InstCombinerImpl::visitExtractElementInst

Instruction * visitExtractElementInst(ExtractElementInst &EI)

Definition:InstCombineVectorOps.cpp:398

llvm::InstCombinerImpl::simplifyBinOpSplats

Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)

Definition:InstCombineVectorOps.cpp:2830

llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse

Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)

Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...

Definition:InstCombineVectorOps.cpp:868

llvm::InstCombinerImpl::visitShuffleVectorInst

Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)

Definition:InstCombineVectorOps.cpp:2857

llvm::InstCombiner::SQ

SimplifyQuery SQ

Definition:InstCombiner.h:77

llvm::InstCombiner::replaceInstUsesWith

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

Definition:InstCombiner.h:388

llvm::InstCombiner::Worklist

InstructionWorklist & Worklist

A worklist of the instructions that need to be simplified.

Definition:InstCombiner.h:65

llvm::InstCombiner::InsertNewInstWith

Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)

Same as InsertNewInstBefore, but also sets the debug loc.

Definition:InstCombiner.h:377

llvm::InstCombiner::DL

const DataLayout & DL

Definition:InstCombiner.h:76

llvm::InstCombiner::addToWorklist

void addToWorklist(Instruction *I)

Definition:InstCombiner.h:332

llvm::InstCombiner::replaceOperand

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

Definition:InstCombiner.h:412

llvm::InstCombiner::getSafeVectorConstantForBinop

static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)

Some binary operators require special handling to avoid poison and undefined behavior.

Definition:InstCombiner.h:280

llvm::InstCombiner::Builder

BuilderTy & Builder

Definition:InstCombiner.h:61

llvm::InstCombiner::getSimplifyQuery

const SimplifyQuery & getSimplifyQuery() const

Definition:InstCombiner.h:338

llvm::InstructionWorklist::addValue

void addValue(Value *V)

Add value to the worklist if it is an instruction.

Definition:InstructionWorklist.h:51

llvm::Instruction

Definition:Instruction.h:68

llvm::Instruction::hasNoUnsignedWrap

bool hasNoUnsignedWrap() const LLVM_READONLY

Determine whether the no unsigned wrap flag is set.

Definition:Instruction.cpp:403

llvm::Instruction::hasNoSignedWrap

bool hasNoSignedWrap() const LLVM_READONLY

Determine whether the no signed wrap flag is set.

Definition:Instruction.cpp:410

llvm::Instruction::copyIRFlags

void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)

Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...

Definition:Instruction.cpp:660

llvm::Instruction::getModule

const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

Definition:Instruction.cpp:68

llvm::Instruction::andIRFlags

void andIRFlags(const Value *V)

Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.

Definition:Instruction.cpp:704

llvm::Instruction::setFastMathFlags

void setFastMathFlags(FastMathFlags FMF)

Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...

Definition:Instruction.cpp:601

llvm::Instruction::user_back

Instruction * user_back()

Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...

Definition:Instruction.h:169

llvm::Instruction::isExact

bool isExact() const LLVM_READONLY

Determine whether the exact flag is set.

Definition:Instruction.cpp:557

llvm::Instruction::getOpcode

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Definition:Instruction.h:310

llvm::Instruction::BinaryOps

BinaryOps

Definition:Instruction.h:1008

llvm::Instruction::isShift

bool isShift() const

Definition:Instruction.h:318

llvm::Instruction::dropPoisonGeneratingFlags

void dropPoisonGeneratingFlags()

Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.

Definition:Instruction.cpp:426

llvm::Instruction::isIntDivRem

bool isIntDivRem() const

Definition:Instruction.h:316

llvm::Instruction::CastOps

CastOps

Definition:Instruction.h:1022

llvm::IntrinsicInst

A wrapper class for inspecting calls to intrinsic functions.

Definition:IntrinsicInst.h:48

llvm::PHINode

Definition:Instructions.h:2600

llvm::PHINode::addIncoming

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

Definition:Instructions.h:2735

llvm::PHINode::getIncomingBlock

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Definition:Instructions.h:2695

llvm::PHINode::getIncomingValue

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

Definition:Instructions.h:2675

llvm::PHINode::getNumIncomingValues

unsigned getNumIncomingValues() const

Return the number of incoming edges.

Definition:Instructions.h:2671

llvm::PHINode::Create

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

Definition:Instructions.h:2635

llvm::PoisonValue

In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...

Definition:Constants.h:1460

llvm::PoisonValue::get

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition:Constants.cpp:1878

llvm::SelectInst

This class represents the LLVM 'select' instruction.

Definition:Instructions.h:1657

llvm::SelectInst::Create

static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)

Definition:Instructions.h:1682

llvm::ShuffleVectorInst

This instruction constructs a fixed permutation of two input vectors.

Definition:Instructions.h:1901

llvm::ShuffleVectorInst::changesLength

bool changesLength() const

Return true if this shuffle returns a vector with a different number of elements than its source vect...

Definition:Instructions.h:1980

llvm::ShuffleVectorInst::getMaskValue

int getMaskValue(unsigned Elt) const

Return the shuffle mask value of this instruction for the given element index.

Definition:Instructions.h:1950

llvm::ShuffleVectorInst::isSelectMask

static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)

Return true if this shuffle mask chooses elements from its source vectors without lane crossings.

Definition:Instructions.cpp:1925

llvm::ShuffleVectorInst::getType

VectorType * getType() const

Overload to return most specific vector type.

Definition:Instructions.h:1941

llvm::ShuffleVectorInst::increasesLength

bool increasesLength() const

Return true if this shuffle returns a vector with a greater number of elements than its source vector...

Definition:Instructions.h:1991

llvm::ShuffleVectorInst::isIdentityWithExtract

bool isIdentityWithExtract() const

Return true if this shuffle extracts the first N elements of exactly one source vector.

Definition:Instructions.cpp:2136

llvm::ShuffleVectorInst::getShuffleMask

static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)

Convert the input shuffle mask operand to a vector of integers.

Definition:Instructions.cpp:1788

llvm::ShuffleVectorInst::isSelect

bool isSelect() const

Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...

Definition:Instructions.h:2090

llvm::ShuffleVectorInst::isIdentityMask

static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)

Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...

Definition:Instructions.cpp:1883

llvm::ShuffleVectorInst::commuteShuffleMask

static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)

Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...

Definition:Instructions.h:2308

llvm::ShuffleVectorInst::commute

void commute()

Swap the operands and adjust the mask to preserve the semantics of the instruction.

Definition:Instructions.cpp:1700

llvm::SmallBitVector

This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...

Definition:SmallBitVector.h:35

llvm::SmallBitVector::all

bool all() const

Returns true if all bits are set.

Definition:SmallBitVector.h:216

llvm::SmallDenseMap

Definition:DenseMap.h:883

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::emplace_back

reference emplace_back(ArgTypes &&... Args)

Definition:SmallVector.h:937

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::TruncInst

This class represents a truncation of integer types.

Definition:Instructions.h:4503

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::getIntegerBitWidth

unsigned getIntegerBitWidth() const

llvm::Type::isVectorTy

bool isVectorTy() const

True if this is an instance of VectorType.

Definition:Type.h:270

llvm::Type::isIntOrIntVectorTy

bool isIntOrIntVectorTy() const

Return true if this is an integer type or a vector of integer types.

Definition:Type.h:243

llvm::Type::getStructNumElements

unsigned getStructNumElements() const

llvm::Type::getArrayNumElements

uint64_t getArrayNumElements() const

llvm::Type::ArrayTyID

@ ArrayTyID

Arrays.

Definition:Type.h:74

llvm::Type::StructTyID

@ StructTyID

Structures.

Definition:Type.h:73

llvm::Type::getIntNTy

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::getScalarSizeInBits

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::isFloatingPointTy

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

Definition:Type.h:184

llvm::Type::getInt64Ty

static IntegerType * getInt64Ty(LLVMContext &C)

llvm::Type::isIntegerTy

bool isIntegerTy() const

True if this is an instance of IntegerType.

Definition:Type.h:237

llvm::Type::getTypeID

TypeID getTypeID() const

Return the type id for the type.

Definition:Type.h:136

llvm::Type::getPrimitiveSizeInBits

TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

llvm::UnaryOperator

Definition:InstrTypes.h:100

llvm::UnaryOperator::CreateWithCopiedFlags

static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Definition:InstrTypes.h:138

llvm::UnaryOperator::getOpcode

UnaryOps getOpcode() const

Definition:InstrTypes.h:153

llvm::UnaryOperator::CreateFNegFMF

static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Definition:InstrTypes.h:146

llvm::UndefValue::get

static UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

Definition:Constants.cpp:1859

llvm::Use

A Use represents the edge between a Value definition and its users.

Definition:Use.h:43

llvm::User

Definition:User.h:44

llvm::User::getOperand

Value * getOperand(unsigned i) const

Definition:User.h:228

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::DoPHITranslation

const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const

Translate PHI node to its predecessor from the given basic block.

Definition:Value.cpp:1067

llvm::Value::hasOneUse

bool hasOneUse() const

Return true if there is exactly one use of this value.

Definition:Value.h:434

llvm::Value::replaceAllUsesWith

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

Definition:Value.cpp:534

llvm::Value::users

iterator_range< user_iterator > users()

Definition:Value.h:421

llvm::Value::getContext

LLVMContext & getContext() const

All values hold a context through their type.

Definition:Value.cpp:1075

llvm::Value::getName

StringRef getName() const

Return a constant reference to the value's name.

Definition:Value.cpp:309

llvm::VectorType::getElementCount

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

Definition:DerivedTypes.h:665

llvm::VectorType::get

static VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

llvm::VectorType::isValidElementType

static bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

llvm::VectorType::getElementType

Type * getElementType() const

Definition:DerivedTypes.h:460

llvm::details::FixedOrScalableQuantity::isScalable

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

Definition:TypeSize.h:171

llvm::details::FixedOrScalableQuantity::getKnownMinValue

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

Definition:TypeSize.h:168

llvm::ilist_detail::node_parent_access::getParent

const ParentTy * getParent() const

Definition:ilist_node.h:32

llvm::ilist_node_impl::getIterator

self_iterator getIterator()

Definition:ilist_node.h:132

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::Intrinsic::getOrInsertDeclaration

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

Definition:Intrinsics.cpp:732

llvm::M68k::MemAddrModeKind::V

@ V

llvm::MipsISD::Ext

@ Ext

Definition:MipsISelLowering.h:157

llvm::NVPTX::PTXLdStInstCode::Scalar

@ Scalar

Definition:NVPTX.h:162

llvm::PatternMatch::m_Poison

class_match< PoisonValue > m_Poison()

Match an arbitrary poison constant.

Definition:PatternMatch.h:160

llvm::PatternMatch::m_BinOp

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

Definition:PatternMatch.h:100

llvm::PatternMatch::m_Constant

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

Definition:PatternMatch.h:165

llvm::PatternMatch::m_Trunc

CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)

Matches Trunc.

Definition:PatternMatch.h:2075

llvm::PatternMatch::m_SpecificInt

specific_intval< false > m_SpecificInt(const APInt &V)

Match a specific integer value or vector with all elements equal to the value.

Definition:PatternMatch.h:982

llvm::PatternMatch::match

bool match(Val *V, const Pattern &P)

Definition:PatternMatch.h:49

llvm::PatternMatch::m_Specific

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

Definition:PatternMatch.h:885

llvm::PatternMatch::m_ExtractElt

TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)

Matches ExtractElementInst.

Definition:PatternMatch.h:1837

llvm::PatternMatch::m_ConstantInt

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

Definition:PatternMatch.h:168

llvm::PatternMatch::m_Select

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

Definition:PatternMatch.h:1799

llvm::PatternMatch::m_ZeroInt

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

Definition:PatternMatch.h:599

llvm::PatternMatch::m_OneUse

OneUse_match< T > m_OneUse(const T &SubPattern)

Definition:PatternMatch.h:67

llvm::PatternMatch::m_Neg

BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)

Matches a 'Neg' as 'sub 0, V'.

Definition:PatternMatch.h:2820

llvm::PatternMatch::m_Shuffle

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

Definition:PatternMatch.h:1911

llvm::PatternMatch::m_ImmConstant

match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()

Match an arbitrary immediate Constant and ignore it.

Definition:PatternMatch.h:864

llvm::PatternMatch::m_FPExt

CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)

Definition:PatternMatch.h:2176

llvm::PatternMatch::m_Load

OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)

Matches LoadInst.

Definition:PatternMatch.h:1923

llvm::PatternMatch::m_ZExt

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

Definition:PatternMatch.h:2107

llvm::PatternMatch::m_Cmp

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

Definition:PatternMatch.h:105

llvm::PatternMatch::m_BitCast

CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)

Matches BitCast.

Definition:PatternMatch.h:2021

llvm::PatternMatch::m_UnOp

class_match< UnaryOperator > m_UnOp()

Match an arbitrary unary operation and ignore it.

Definition:PatternMatch.h:95

llvm::PatternMatch::m_Value

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

Definition:PatternMatch.h:92

llvm::PatternMatch::m_LShr

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1240

llvm::PatternMatch::m_FNeg

FNeg_match< OpTy > m_FNeg(const OpTy &X)

Match 'fneg X' as 'fsub -0.0, X'.

Definition:PatternMatch.h:1156

llvm::PatternMatch::m_Undef

auto m_Undef()

Match an arbitrary undef constant.

Definition:PatternMatch.h:152

llvm::PatternMatch::m_SExt

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

Definition:PatternMatch.h:2101

llvm::PatternMatch::m_Zero

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

Definition:PatternMatch.h:612

llvm::PatternMatch::m_InsertElt

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

Definition:PatternMatch.h:1829

llvm::PatternMatch::m_FAbs

m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)

Definition:PatternMatch.h:2702

llvm::PatternMatch::m_CombineOr

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

Definition:PatternMatch.h:239

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::isSafeToSpeculativelyExecuteWithVariableReplaced

bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I)

Don't use information from its non-constant operands.

Definition:ValueTracking.h:847

llvm::Depth

@ Depth

Definition:SIMachineScheduler.h:36

llvm::enumerate

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

Definition:STLExtras.h:2448

llvm::createUnaryMask

llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)

Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...

Definition:VectorUtils.cpp:1053

llvm::simplifyShuffleVectorInst

Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)

Given operands for a ShuffleVectorInst, fold the result or return null.

Definition:InstructionSimplify.cpp:5568

llvm::isPowerOf2_32

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

Definition:MathExtras.h:292

llvm::ComplexDeinterleavingOperation::Splat

@ Splat

llvm::simplifyInsertValueInst

Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)

Given operands for an InsertValueInst, fold the result or return null.

Definition:InstructionSimplify.cpp:5177

llvm::ConstantFoldBinaryOpOperands

Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)

Attempt to constant fold a binary operation with the specified operands.

Definition:ConstantFolding.cpp:1300

llvm::PoisonMaskElem

constexpr int PoisonMaskElem

Definition:Instructions.h:1889

llvm::findScalarElement

Value * findScalarElement(Value *V, unsigned EltNo)

Given a vector and an element number, see if the scalar value is already around as a register,...

Definition:VectorUtils.cpp:226

llvm::simplifyInsertElementInst

Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)

Given operands for an InsertElement, fold the result or return null.

Definition:InstructionSimplify.cpp:5183

llvm::BitWidth

constexpr unsigned BitWidth

Definition:BitmaskEnum.h:217

llvm::count_if

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Definition:STLExtras.h:1945

llvm::predecessors

auto predecessors(const MachineBasicBlock *BB)

Definition:MachineBasicBlock.h:1377

llvm::is_contained

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Definition:STLExtras.h:1903

llvm::pred_empty

bool pred_empty(const BasicBlock *BB)

Definition:CFG.h:118

llvm::isKnownNeverNaN

bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)

Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...

Definition:ValueTracking.h:601

llvm::isGuaranteedNotToBePoison

bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Returns true if V cannot be poison, but may be undef.

Definition:ValueTracking.cpp:7856

llvm::simplifyExtractElementInst

Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)

Given operands for an ExtractElementInst, fold the result or return null.

Definition:InstructionSimplify.cpp:5299

llvm::scaleShuffleMaskElts

bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.

Definition:VectorUtils.cpp:517

std::swap

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)