Movatterモバイル変換

Go to the documentation of this file.

1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This file defines vectorizer utilities.

10//

11//===----------------------------------------------------------------------===//

13#include "llvm/Analysis/VectorUtils.h"

14#include "llvm/ADT/EquivalenceClasses.h"

15#include "llvm/ADT/SmallVector.h"

16#include "llvm/Analysis/DemandedBits.h"

17#include "llvm/Analysis/LoopInfo.h"

18#include "llvm/Analysis/LoopIterator.h"

19#include "llvm/Analysis/ScalarEvolution.h"

20#include "llvm/Analysis/ScalarEvolutionExpressions.h"

21#include "llvm/Analysis/TargetTransformInfo.h"

22#include "llvm/Analysis/ValueTracking.h"

23#include "llvm/IR/Constants.h"

24#include "llvm/IR/DerivedTypes.h"

25#include "llvm/IR/IRBuilder.h"

26#include "llvm/IR/MemoryModelRelaxationAnnotations.h"

27#include "llvm/IR/PatternMatch.h"

28#include "llvm/IR/Value.h"

29#include "llvm/Support/CommandLine.h"

31#define DEBUG_TYPE "vectorutils"

33using namespacellvm;

34using namespacellvm::PatternMatch;

36/// Maximum factor for an interleaved memory access.

37staticcl::opt<unsigned>MaxInterleaveGroupFactor(

38"max-interleave-group-factor",cl::Hidden,

39cl::desc("Maximum factor for an interleaved access group (default = 8)"),

40cl::init(8));

42/// Return true if all of the intrinsic's arguments and return type are scalars

43/// for the scalar form of the intrinsic, and vectors for the vector form of the

44/// intrinsic (except operands that are marked as always being scalar by

45/// isVectorIntrinsicWithScalarOpAtArg).

46boolllvm::isTriviallyVectorizable(Intrinsic::ID ID) {

47switch (ID) {

48case Intrinsic::abs:// Begin integer bit-manipulation.

49case Intrinsic::bswap:

50case Intrinsic::bitreverse:

51case Intrinsic::ctpop:

52case Intrinsic::ctlz:

53case Intrinsic::cttz:

54case Intrinsic::fshl:

55case Intrinsic::fshr:

56case Intrinsic::smax:

57case Intrinsic::smin:

58case Intrinsic::umax:

59case Intrinsic::umin:

60case Intrinsic::sadd_sat:

61case Intrinsic::ssub_sat:

62case Intrinsic::uadd_sat:

63case Intrinsic::usub_sat:

64case Intrinsic::smul_fix:

65case Intrinsic::smul_fix_sat:

66case Intrinsic::umul_fix:

67case Intrinsic::umul_fix_sat:

68case Intrinsic::sqrt:// Begin floating-point.

69case Intrinsic::asin:

70case Intrinsic::acos:

71case Intrinsic::atan:

72case Intrinsic::atan2:

73case Intrinsic::sin:

74case Intrinsic::cos:

75case Intrinsic::tan:

76case Intrinsic::sinh:

77case Intrinsic::cosh:

78case Intrinsic::tanh:

79case Intrinsic::exp:

80case Intrinsic::exp10:

81case Intrinsic::exp2:

82case Intrinsic::log:

83case Intrinsic::log10:

84case Intrinsic::log2:

85case Intrinsic::fabs:

86case Intrinsic::minnum:

87case Intrinsic::maxnum:

88case Intrinsic::minimum:

89case Intrinsic::maximum:

90case Intrinsic::copysign:

91case Intrinsic::floor:

92case Intrinsic::ceil:

93case Intrinsic::trunc:

94case Intrinsic::rint:

95case Intrinsic::nearbyint:

96case Intrinsic::round:

97case Intrinsic::roundeven:

98case Intrinsic::pow:

99case Intrinsic::fma:

100case Intrinsic::fmuladd:

101case Intrinsic::is_fpclass:

102case Intrinsic::powi:

103case Intrinsic::canonicalize:

104case Intrinsic::fptosi_sat:

105case Intrinsic::fptoui_sat:

106case Intrinsic::lrint:

107case Intrinsic::llrint:

108case Intrinsic::ucmp:

109case Intrinsic::scmp:

110returntrue;

111default:

112returnfalse;

113 }

114}

115

116boolllvm::isTriviallyScalarizable(Intrinsic::ID ID,

117constTargetTransformInfo *TTI) {

118if (isTriviallyVectorizable(ID))

119returntrue;

120

121if (TTI &&Intrinsic::isTargetIntrinsic(ID))

122returnTTI->isTargetIntrinsicTriviallyScalarizable(ID);

123

124// TODO: Move frexp to isTriviallyVectorizable.

125// https://github.com/llvm/llvm-project/issues/112408

126switch (ID) {

127case Intrinsic::frexp:

128returntrue;

129 }

130returnfalse;

131}

132

133/// Identifies if the vector form of the intrinsic has a scalar operand.

134boolllvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,

135unsigned ScalarOpdIdx,

136constTargetTransformInfo *TTI) {

137

138if (TTI &&Intrinsic::isTargetIntrinsic(ID))

139returnTTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);

140

141switch (ID) {

142case Intrinsic::abs:

143case Intrinsic::vp_abs:

144case Intrinsic::ctlz:

145case Intrinsic::vp_ctlz:

146case Intrinsic::cttz:

147case Intrinsic::vp_cttz:

148case Intrinsic::is_fpclass:

149case Intrinsic::vp_is_fpclass:

150case Intrinsic::powi:

151return (ScalarOpdIdx == 1);

152case Intrinsic::smul_fix:

153case Intrinsic::smul_fix_sat:

154case Intrinsic::umul_fix:

155case Intrinsic::umul_fix_sat:

156return (ScalarOpdIdx == 2);

157default:

158returnfalse;

159 }

160}

161

162boolllvm::isVectorIntrinsicWithOverloadTypeAtArg(

163Intrinsic::ID ID,int OpdIdx,constTargetTransformInfo *TTI) {

164assert(ID !=Intrinsic::not_intrinsic &&"Not an intrinsic!");

165

166if (TTI &&Intrinsic::isTargetIntrinsic(ID))

167returnTTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);

168

169if (VPCastIntrinsic::isVPCast(ID))

170return OpdIdx == -1 || OpdIdx == 0;

171

172switch (ID) {

173case Intrinsic::fptosi_sat:

174case Intrinsic::fptoui_sat:

175case Intrinsic::lrint:

176case Intrinsic::llrint:

177case Intrinsic::vp_lrint:

178case Intrinsic::vp_llrint:

179case Intrinsic::ucmp:

180case Intrinsic::scmp:

181return OpdIdx == -1 || OpdIdx == 0;

182case Intrinsic::is_fpclass:

183case Intrinsic::vp_is_fpclass:

184return OpdIdx == 0;

185case Intrinsic::powi:

186return OpdIdx == -1 || OpdIdx == 1;

187default:

188return OpdIdx == -1;

189 }

190}

191

192boolllvm::isVectorIntrinsicWithStructReturnOverloadAtField(

193Intrinsic::ID ID,int RetIdx,constTargetTransformInfo *TTI) {

194

195if (TTI &&Intrinsic::isTargetIntrinsic(ID))

196returnTTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);

197

198switch (ID) {

199case Intrinsic::frexp:

200return RetIdx == 0 || RetIdx == 1;

201default:

202return RetIdx == 0;

203 }

204}

205

206/// Returns intrinsic ID for call.

207/// For the input call instruction it finds mapping intrinsic and returns

208/// its ID, in case it does not found it return not_intrinsic.

209Intrinsic::ID llvm::getVectorIntrinsicIDForCall(constCallInst *CI,

210constTargetLibraryInfo *TLI) {

211Intrinsic::ID ID =getIntrinsicForCallSite(*CI, TLI);

212if (ID ==Intrinsic::not_intrinsic)

213returnIntrinsic::not_intrinsic;

214

215if (isTriviallyVectorizable(ID) ||ID == Intrinsic::lifetime_start ||

216ID == Intrinsic::lifetime_end ||ID == Intrinsic::assume ||

217ID == Intrinsic::experimental_noalias_scope_decl ||

218ID == Intrinsic::sideeffect ||ID == Intrinsic::pseudoprobe)

219returnID;

220returnIntrinsic::not_intrinsic;

221}

222

223/// Given a vector and an element number, see if the scalar value is

224/// already around as a register, for example if it were inserted then extracted

225/// from the vector.

226Value *llvm::findScalarElement(Value *V,unsigned EltNo) {

227assert(V->getType()->isVectorTy() &&"Not looking at a vector?");

228VectorType *VTy = cast<VectorType>(V->getType());

229// For fixed-length vector, return poison for out of range access.

230if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {

231unsigned Width = FVTy->getNumElements();

232if (EltNo >= Width)

233returnPoisonValue::get(FVTy->getElementType());

234 }

235

236if (Constant *C = dyn_cast<Constant>(V))

237returnC->getAggregateElement(EltNo);

238

239if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {

240// If this is an insert to a variable element, we don't know what it is.

241if (!isa<ConstantInt>(III->getOperand(2)))

242returnnullptr;

243unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();

244

245// If this is an insert to the element we are looking for, return the

246// inserted value.

247if (EltNo == IIElt)

248return III->getOperand(1);

249

250// Guard against infinite loop on malformed, unreachable IR.

251if (III == III->getOperand(0))

252returnnullptr;

253

254// Otherwise, the insertelement doesn't modify the value, recurse on its

255// vector input.

256returnfindScalarElement(III->getOperand(0), EltNo);

257 }

258

259ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);

260// Restrict the following transformation to fixed-length vector.

261if (SVI && isa<FixedVectorType>(SVI->getType())) {

262unsigned LHSWidth =

263 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();

264int InEl = SVI->getMaskValue(EltNo);

265if (InEl < 0)

266returnPoisonValue::get(VTy->getElementType());

267if (InEl < (int)LHSWidth)

268returnfindScalarElement(SVI->getOperand(0), InEl);

269returnfindScalarElement(SVI->getOperand(1), InEl - LHSWidth);

270 }

271

272// Extract a value from a vector add operation with a constant zero.

273// TODO: Use getBinOpIdentity() to generalize this.

274Value *Val;Constant *C;

275if (match(V,m_Add(m_Value(Val),m_Constant(C))))

276if (Constant *Elt =C->getAggregateElement(EltNo))

277if (Elt->isNullValue())

278returnfindScalarElement(Val, EltNo);

279

280// If the vector is a splat then we can trivially find the scalar element.

281if (isa<ScalableVectorType>(VTy))

282if (Value *Splat =getSplatValue(V))

283if (EltNo < VTy->getElementCount().getKnownMinValue())

284returnSplat;

285

286// Otherwise, we don't know.

287returnnullptr;

288}

289

290intllvm::getSplatIndex(ArrayRef<int> Mask) {

291int SplatIndex = -1;

292for (int M : Mask) {

293// Ignore invalid (undefined) mask elements.

294if (M < 0)

295continue;

296

297// There can be only 1 non-negative mask element value if this is a splat.

298if (SplatIndex != -1 && SplatIndex != M)

299return -1;

300

301// Initialize the splat index to the 1st non-negative mask element.

302 SplatIndex = M;

303 }

304assert((SplatIndex == -1 || SplatIndex >= 0) &&"Negative index?");

305return SplatIndex;

306}

307

308/// Get splat value if the input is a splat vector or return nullptr.

309/// This function is not fully general. It checks only 2 cases:

310/// the input value is (1) a splat constant vector or (2) a sequence

311/// of instructions that broadcasts a scalar at element 0.

312Value *llvm::getSplatValue(constValue *V) {

313if (isa<VectorType>(V->getType()))

314if (auto *C = dyn_cast<Constant>(V))

315returnC->getSplatValue();

316

317// shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>

318Value *Splat;

319if (match(V,

320m_Shuffle(m_InsertElt(m_Value(),m_Value(Splat),m_ZeroInt()),

321m_Value(),m_ZeroMask())))

322returnSplat;

323

324returnnullptr;

325}

326

327boolllvm::isSplatValue(constValue *V,int Index,unsignedDepth) {

328assert(Depth <=MaxAnalysisRecursionDepth &&"Limit Search Depth");

329

330if (isa<VectorType>(V->getType())) {

331if (isa<UndefValue>(V))

332returntrue;

333// FIXME: We can allow undefs, but if Index was specified, we may want to

334// check that the constant is defined at that index.

335if (auto *C = dyn_cast<Constant>(V))

336returnC->getSplatValue() !=nullptr;

337 }

338

339if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {

340// FIXME: We can safely allow undefs here. If Index was specified, we will

341// check that the mask elt is defined at the required index.

342if (!all_equal(Shuf->getShuffleMask()))

343returnfalse;

344

345// Match any index.

346if (Index == -1)

347returntrue;

348

349// Match a specific element. The mask should be defined at and match the

350// specified index.

351return Shuf->getMaskValue(Index) == Index;

352 }

353

354// The remaining tests are all recursive, so bail out if we hit the limit.

355if (Depth++ ==MaxAnalysisRecursionDepth)

356returnfalse;

357

358// If both operands of a binop are splats, the result is a splat.

359Value *X, *Y, *Z;

360if (match(V,m_BinOp(m_Value(X),m_Value(Y))))

361returnisSplatValue(X, Index,Depth) &&isSplatValue(Y, Index,Depth);

362

363// If all operands of a select are splats, the result is a splat.

364if (match(V,m_Select(m_Value(X),m_Value(Y),m_Value(Z))))

365returnisSplatValue(X, Index,Depth) &&isSplatValue(Y, Index,Depth) &&

366isSplatValue(Z, Index,Depth);

367

368// TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).

369

370returnfalse;

371}

372

373boolllvm::getShuffleDemandedElts(int SrcWidth,ArrayRef<int> Mask,

374constAPInt &DemandedElts,APInt &DemandedLHS,

375APInt &DemandedRHS,bool AllowUndefElts) {

376 DemandedLHS = DemandedRHS =APInt::getZero(SrcWidth);

377

378// Early out if we don't demand any elements.

379if (DemandedElts.isZero())

380returntrue;

381

382// Simple case of a shuffle with zeroinitializer.

383if (all_of(Mask, [](int Elt) {return Elt == 0; })) {

384 DemandedLHS.setBit(0);

385returntrue;

386 }

387

388for (unsignedI = 0, E = Mask.size();I != E; ++I) {

389int M = Mask[I];

390assert((-1 <= M) && (M < (SrcWidth * 2)) &&

391"Invalid shuffle mask constant");

392

393if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))

394continue;

395

396// For undef elements, we don't know anything about the common state of

397// the shuffle result.

398if (M < 0)

399returnfalse;

400

401if (M < SrcWidth)

402 DemandedLHS.setBit(M);

403else

404 DemandedRHS.setBit(M - SrcWidth);

405 }

406

407returntrue;

408}

409

410voidllvm::narrowShuffleMaskElts(int Scale,ArrayRef<int> Mask,

411SmallVectorImpl<int> &ScaledMask) {

412assert(Scale > 0 &&"Unexpected scaling factor");

413

414// Fast-path: if no scaling, then it is just a copy.

415if (Scale == 1) {

416 ScaledMask.assign(Mask.begin(), Mask.end());

417return;

418 }

419

420 ScaledMask.clear();

421for (int MaskElt : Mask) {

422if (MaskElt >= 0) {

423assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&

424"Overflowed 32-bits");

425 }

426for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)

427 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);

428 }

429}

430

431boolllvm::widenShuffleMaskElts(int Scale,ArrayRef<int> Mask,

432SmallVectorImpl<int> &ScaledMask) {

433assert(Scale > 0 &&"Unexpected scaling factor");

434

435// Fast-path: if no scaling, then it is just a copy.

436if (Scale == 1) {

437 ScaledMask.assign(Mask.begin(), Mask.end());

438returntrue;

439 }

440

441// We must map the original elements down evenly to a type with less elements.

442int NumElts = Mask.size();

443if (NumElts % Scale != 0)

444returnfalse;

445

446 ScaledMask.clear();

447 ScaledMask.reserve(NumElts / Scale);

448

449// Step through the input mask by splitting into Scale-sized slices.

450do {

451ArrayRef<int> MaskSlice = Mask.take_front(Scale);

452assert((int)MaskSlice.size() == Scale &&"Expected Scale-sized slice.");

453

454// The first element of the slice determines how we evaluate this slice.

455int SliceFront = MaskSlice.front();

456if (SliceFront < 0) {

457// Negative values (undef or other "sentinel" values) must be equal across

458// the entire slice.

459if (!all_equal(MaskSlice))

460returnfalse;

461 ScaledMask.push_back(SliceFront);

462 }else {

463// A positive mask element must be cleanly divisible.

464if (SliceFront % Scale != 0)

465returnfalse;

466// Elements of the slice must be consecutive.

467for (int i = 1; i < Scale; ++i)

468if (MaskSlice[i] != SliceFront + i)

469returnfalse;

470 ScaledMask.push_back(SliceFront / Scale);

471 }

472 Mask = Mask.drop_front(Scale);

473 }while (!Mask.empty());

474

475assert((int)ScaledMask.size() * Scale == NumElts &&"Unexpected scaled mask");

476

477// All elements of the original mask can be scaled down to map to the elements

478// of a mask with wider elements.

479returntrue;

480}

481

482boolllvm::widenShuffleMaskElts(ArrayRef<int> M,

483SmallVectorImpl<int> &NewMask) {

484unsigned NumElts = M.size();

485if (NumElts % 2 != 0)

486returnfalse;

487

488 NewMask.clear();

489for (unsigned i = 0; i < NumElts; i += 2) {

490intM0 = M[i];

491intM1 = M[i + 1];

492

493// If both elements are undef, new mask is undef too.

494if (M0 == -1 &&M1 == -1) {

495 NewMask.push_back(-1);

496continue;

497 }

498

499if (M0 == -1 &&M1 != -1 && (M1 % 2) == 1) {

500 NewMask.push_back(M1 / 2);

501continue;

502 }

503

504if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) ==M1 ||M1 == -1)) {

505 NewMask.push_back(M0 / 2);

506continue;

507 }

508

509 NewMask.clear();

510returnfalse;

511 }

512

513assert(NewMask.size() == NumElts / 2 &&"Incorrect size for mask!");

514returntrue;

515}

516

517boolllvm::scaleShuffleMaskElts(unsigned NumDstElts,ArrayRef<int> Mask,

518SmallVectorImpl<int> &ScaledMask) {

519unsigned NumSrcElts = Mask.size();

520assert(NumSrcElts > 0 && NumDstElts > 0 &&"Unexpected scaling factor");

521

522// Fast-path: if no scaling, then it is just a copy.

523if (NumSrcElts == NumDstElts) {

524 ScaledMask.assign(Mask.begin(), Mask.end());

525returntrue;

526 }

527

528// Ensure we can find a whole scale factor.

529assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&

530"Unexpected scaling factor");

531

532if (NumSrcElts > NumDstElts) {

533int Scale = NumSrcElts / NumDstElts;

534returnwidenShuffleMaskElts(Scale, Mask, ScaledMask);

535 }

536

537int Scale = NumDstElts / NumSrcElts;

538narrowShuffleMaskElts(Scale, Mask, ScaledMask);

539returntrue;

540}

541

542voidllvm::getShuffleMaskWithWidestElts(ArrayRef<int> Mask,

543SmallVectorImpl<int> &ScaledMask) {

544 std::array<SmallVector<int, 16>, 2> TmpMasks;

545SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];

546ArrayRef<int> InputMask = Mask;

547for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {

548while (widenShuffleMaskElts(Scale, InputMask, *Output)) {

549 InputMask = *Output;

550std::swap(Output, Tmp);

551 }

552 }

553 ScaledMask.assign(InputMask.begin(), InputMask.end());

554}

555

556voidllvm::processShuffleMasks(

557ArrayRef<int> Mask,unsigned NumOfSrcRegs,unsigned NumOfDestRegs,

558unsigned NumOfUsedRegs,function_ref<void()> NoInputAction,

559function_ref<void(ArrayRef<int>,unsigned,unsigned)> SingleInputAction,

560function_ref<void(ArrayRef<int>,unsigned,unsigned,bool)>

561 ManyInputsAction) {

562SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);

563// Try to perform better estimation of the permutation.

564// 1. Split the source/destination vectors into real registers.

565// 2. Do the mask analysis to identify which real registers are

566// permuted.

567int Sz = Mask.size();

568unsigned SzDest = Sz / NumOfDestRegs;

569unsigned SzSrc = Sz / NumOfSrcRegs;

570for (unsignedI = 0;I < NumOfDestRegs; ++I) {

571auto &RegMasks = Res[I];

572 RegMasks.assign(2 * NumOfSrcRegs, {});

573// Check that the values in dest registers are in the one src

574// register.

575for (unsigned K = 0; K < SzDest; ++K) {

576intIdx =I * SzDest + K;

577if (Idx == Sz)

578break;

579if (Mask[Idx] >= 2 * Sz || Mask[Idx] ==PoisonMaskElem)

580continue;

581int MaskIdx = Mask[Idx] % Sz;

582int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);

583// Add a cost of PermuteTwoSrc for each new source register permute,

584// if we have more than one source registers.

585if (RegMasks[SrcRegIdx].empty())

586 RegMasks[SrcRegIdx].assign(SzDest,PoisonMaskElem);

587 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;

588 }

589 }

590// Process split mask.

591for (unsignedI : seq<unsigned>(NumOfUsedRegs)) {

592auto &Dest = Res[I];

593int NumSrcRegs =

594count_if(Dest, [](ArrayRef<int> Mask) {return !Mask.empty(); });

595switch (NumSrcRegs) {

596case 0:

597// No input vectors were used!

598 NoInputAction();

599break;

600case 1: {

601// Find the only mask with at least single undef mask elem.

602auto *It =

603find_if(Dest, [](ArrayRef<int> Mask) {return !Mask.empty(); });

604unsigned SrcReg = std::distance(Dest.begin(), It);

605 SingleInputAction(*It, SrcReg,I);

606break;

607 }

608default: {

609// The first mask is a permutation of a single register. Since we have >2

610// input registers to shuffle, we merge the masks for 2 first registers

611// and generate a shuffle of 2 registers rather than the reordering of the

612// first register and then shuffle with the second register. Next,

613// generate the shuffles of the resulting register + the remaining

614// registers from the list.

615auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,

616ArrayRef<int> SecondMask) {

617for (intIdx = 0, VF = FirstMask.size();Idx < VF; ++Idx) {

618if (SecondMask[Idx] !=PoisonMaskElem) {

619assert(FirstMask[Idx] ==PoisonMaskElem &&

620"Expected undefined mask element.");

621 FirstMask[Idx] = SecondMask[Idx] + VF;

622 }

623 }

624 };

625auto &&NormalizeMask = [](MutableArrayRef<int>Mask) {

626for (intIdx = 0, VF =Mask.size();Idx < VF; ++Idx) {

627if (Mask[Idx] !=PoisonMaskElem)

628Mask[Idx] =Idx;

629 }

630 };

631int SecondIdx;

632bool NewReg =true;

633do {

634int FirstIdx = -1;

635 SecondIdx = -1;

636MutableArrayRef<int> FirstMask, SecondMask;

637for (unsignedI : seq<unsigned>(2 * NumOfSrcRegs)) {

638SmallVectorImpl<int> &RegMask = Dest[I];

639if (RegMask.empty())

640continue;

641

642if (FirstIdx == SecondIdx) {

643 FirstIdx =I;

644 FirstMask = RegMask;

645continue;

646 }

647 SecondIdx =I;

648 SecondMask = RegMask;

649 CombineMasks(FirstMask, SecondMask);

650 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);

651 NewReg =false;

652 NormalizeMask(FirstMask);

653 RegMask.clear();

654 SecondMask = FirstMask;

655 SecondIdx = FirstIdx;

656 }

657if (FirstIdx != SecondIdx && SecondIdx >= 0) {

658 CombineMasks(SecondMask, FirstMask);

659 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);

660 NewReg =false;

661 Dest[FirstIdx].clear();

662 NormalizeMask(SecondMask);

663 }

664 }while (SecondIdx >= 0);

665break;

666 }

667 }

668 }

669}

670

671voidllvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,

672constAPInt &DemandedElts,

673APInt &DemandedLHS,

674APInt &DemandedRHS) {

675assert(VectorBitWidth >= 128 &&"Vectors smaller than 128 bit not supported");

676int NumLanes = VectorBitWidth / 128;

677int NumElts = DemandedElts.getBitWidth();

678int NumEltsPerLane = NumElts / NumLanes;

679int HalfEltsPerLane = NumEltsPerLane / 2;

680

681 DemandedLHS =APInt::getZero(NumElts);

682 DemandedRHS =APInt::getZero(NumElts);

683

684// Map DemandedElts to the horizontal operands.

685for (intIdx = 0;Idx != NumElts; ++Idx) {

686if (!DemandedElts[Idx])

687continue;

688int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;

689int LocalIdx =Idx % NumEltsPerLane;

690if (LocalIdx < HalfEltsPerLane) {

691 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);

692 }else {

693 LocalIdx -= HalfEltsPerLane;

694 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);

695 }

696 }

697}

698

699MapVector<Instruction *, uint64_t>

700llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *>Blocks,DemandedBits &DB,

701constTargetTransformInfo *TTI) {

702

703// DemandedBits will give us every value's live-out bits. But we want

704// to ensure no extra casts would need to be inserted, so every DAG

705// of connected values must have the same minimum bitwidth.

706EquivalenceClasses<Value *> ECs;

707SmallVector<Value *, 16> Worklist;

708SmallPtrSet<Value *, 4> Roots;

709SmallPtrSet<Value *, 16> Visited;

710DenseMap<Value *, uint64_t> DBits;

711SmallPtrSet<Instruction *, 4> InstructionSet;

712MapVector<Instruction *, uint64_t> MinBWs;

713

714// Determine the roots. We work bottom-up, from truncs or icmps.

715bool SeenExtFromIllegalType =false;

716for (auto *BB :Blocks)

717for (auto &I : *BB) {

718 InstructionSet.insert(&I);

719

720if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&

721 !TTI->isTypeLegal(I.getOperand(0)->getType()))

722 SeenExtFromIllegalType =true;

723

724// Only deal with non-vector integers up to 64-bits wide.

725if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&

726 !I.getType()->isVectorTy() &&

727I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {

728// Don't make work for ourselves. If we know the loaded type is legal,

729// don't add it to the worklist.

730if (TTI && isa<TruncInst>(&I) &&TTI->isTypeLegal(I.getType()))

731continue;

732

733 Worklist.push_back(&I);

734 Roots.insert(&I);

735 }

736 }

737// Early exit.

738if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))

739return MinBWs;

740

741// Now proceed breadth-first, unioning values together.

742while (!Worklist.empty()) {

743Value *Val = Worklist.pop_back_val();

744Value *Leader = ECs.getOrInsertLeaderValue(Val);

745

746if (!Visited.insert(Val).second)

747continue;

748

749// Non-instructions terminate a chain successfully.

750if (!isa<Instruction>(Val))

751continue;

752Instruction *I = cast<Instruction>(Val);

753

754// If we encounter a type that is larger than 64 bits, we can't represent

755// it so bail out.

756if (DB.getDemandedBits(I).getBitWidth() > 64)

757returnMapVector<Instruction *, uint64_t>();

758

759uint64_t V = DB.getDemandedBits(I).getZExtValue();

760 DBits[Leader] |= V;

761 DBits[I] = V;

762

763// Casts, loads and instructions outside of our range terminate a chain

764// successfully.

765if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||

766 !InstructionSet.count(I))

767continue;

768

769// Unsafe casts terminate a chain unsuccessfully. We can't do anything

770// useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to

771// transform anything that relies on them.

772if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||

773 !I->getType()->isIntegerTy()) {

774 DBits[Leader] |= ~0ULL;

775continue;

776 }

777

778// We don't modify the types of PHIs. Reductions will already have been

779// truncated if possible, and inductions' sizes will have been chosen by

780// indvars.

781if (isa<PHINode>(I))

782continue;

783

784if (DBits[Leader] == ~0ULL)

785// All bits demanded, no point continuing.

786continue;

787

788for (Value *O : cast<User>(I)->operands()) {

789 ECs.unionSets(Leader, O);

790 Worklist.push_back(O);

791 }

792 }

793

794// Now we've discovered all values, walk them to see if there are

795// any users we didn't see. If there are, we can't optimize that

796// chain.

797for (auto &I : DBits)

798for (auto *U :I.first->users())

799if (U->getType()->isIntegerTy() && DBits.count(U) == 0)

800 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;

801

802for (autoI = ECs.begin(), E = ECs.end();I != E; ++I) {

803uint64_t LeaderDemandedBits = 0;

804for (Value *M :llvm::make_range(ECs.member_begin(I), ECs.member_end()))

805 LeaderDemandedBits |= DBits[M];

806

807uint64_t MinBW =llvm::bit_width(LeaderDemandedBits);

808// Round up to a power of 2

809 MinBW =llvm::bit_ceil(MinBW);

810

811// We don't modify the types of PHIs. Reductions will already have been

812// truncated if possible, and inductions' sizes will have been chosen by

813// indvars.

814// If we are required to shrink a PHI, abandon this entire equivalence class.

815bool Abort =false;

816for (Value *M :llvm::make_range(ECs.member_begin(I), ECs.member_end()))

817if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {

818 Abort =true;

819break;

820 }

821if (Abort)

822continue;

823

824for (Value *M :llvm::make_range(ECs.member_begin(I), ECs.member_end())) {

825auto *MI = dyn_cast<Instruction>(M);

826if (!MI)

827continue;

828Type *Ty = M->getType();

829if (Roots.count(M))

830 Ty =MI->getOperand(0)->getType();

831

832if (MinBW >= Ty->getScalarSizeInBits())

833continue;

834

835// If any of M's operands demand more bits than MinBW then M cannot be

836// performed safely in MinBW.

837if (any_of(MI->operands(), [&DB, MinBW](Use &U) {

838 auto *CI = dyn_cast<ConstantInt>(U);

839// For constants shift amounts, check if the shift would result in

840// poison.

841 if (CI &&

842 isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&

843 U.getOperandNo() == 1)

844 return CI->uge(MinBW);

845 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());

846 return bit_ceil(BW) > MinBW;

847 }))

848continue;

849

850 MinBWs[MI] = MinBW;

851 }

852 }

853

854return MinBWs;

855}

856

857/// Add all access groups in @p AccGroups to @p List.

858template <typename ListT>

859staticvoidaddToAccessGroupList(ListT &List,MDNode *AccGroups) {

860// Interpret an access group as a list containing itself.

861if (AccGroups->getNumOperands() == 0) {

862assert(isValidAsAccessGroup(AccGroups) &&"Node must be an access group");

863List.insert(AccGroups);

864return;

865 }

866

867for (constauto &AccGroupListOp : AccGroups->operands()) {

868auto *Item = cast<MDNode>(AccGroupListOp.get());

869assert(isValidAsAccessGroup(Item) &&"List item must be an access group");

870List.insert(Item);

871 }

872}

873

874MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1,MDNode *AccGroups2) {

875if (!AccGroups1)

876return AccGroups2;

877if (!AccGroups2)

878return AccGroups1;

879if (AccGroups1 == AccGroups2)

880return AccGroups1;

881

882SmallSetVector<Metadata *, 4> Union;

883addToAccessGroupList(Union, AccGroups1);

884addToAccessGroupList(Union, AccGroups2);

885

886if (Union.size() == 0)

887returnnullptr;

888if (Union.size() == 1)

889return cast<MDNode>(Union.front());

890

891LLVMContext &Ctx = AccGroups1->getContext();

892returnMDNode::get(Ctx, Union.getArrayRef());

893}

894

895MDNode *llvm::intersectAccessGroups(constInstruction *Inst1,

896constInstruction *Inst2) {

897bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();

898bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();

899

900if (!MayAccessMem1 && !MayAccessMem2)

901returnnullptr;

902if (!MayAccessMem1)

903return Inst2->getMetadata(LLVMContext::MD_access_group);

904if (!MayAccessMem2)

905return Inst1->getMetadata(LLVMContext::MD_access_group);

906

907MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);

908MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);

909if (!MD1 || !MD2)

910returnnullptr;

911if (MD1 == MD2)

912return MD1;

913

914// Use set for scalable 'contains' check.

915SmallPtrSet<Metadata *, 4> AccGroupSet2;

916addToAccessGroupList(AccGroupSet2, MD2);

917

918SmallVector<Metadata *, 4> Intersection;

919if (MD1->getNumOperands() == 0) {

920assert(isValidAsAccessGroup(MD1) &&"Node must be an access group");

921if (AccGroupSet2.count(MD1))

922 Intersection.push_back(MD1);

923 }else {

924for (constMDOperand &Node : MD1->operands()) {

925auto *Item = cast<MDNode>(Node.get());

926assert(isValidAsAccessGroup(Item) &&"List item must be an access group");

927if (AccGroupSet2.count(Item))

928 Intersection.push_back(Item);

929 }

930 }

931

932if (Intersection.size() == 0)

933returnnullptr;

934if (Intersection.size() == 1)

935return cast<MDNode>(Intersection.front());

936

937LLVMContext &Ctx = Inst1->getContext();

938returnMDNode::get(Ctx, Intersection);

939}

940

941/// \returns \p I after propagating metadata from \p VL.

942Instruction *llvm::propagateMetadata(Instruction *Inst,ArrayRef<Value *> VL) {

943if (VL.empty())

944return Inst;

945Instruction *I0 = cast<Instruction>(VL[0]);

946SmallVector<std::pair<unsigned, MDNode *>, 4>Metadata;

947 I0->getAllMetadataOtherThanDebugLoc(Metadata);

948

949for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

950 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,

951 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,

952 LLVMContext::MD_access_group, LLVMContext::MD_mmra}) {

953MDNode *MD = I0->getMetadata(Kind);

954for (int J = 1, E = VL.size(); MD && J != E; ++J) {

955constInstruction *IJ = cast<Instruction>(VL[J]);

956MDNode *IMD = IJ->getMetadata(Kind);

957

958switch (Kind) {

959case LLVMContext::MD_mmra: {

960 MD =MMRAMetadata::combine(Inst->getContext(), MD, IMD);

961break;

962 }

963case LLVMContext::MD_tbaa:

964 MD =MDNode::getMostGenericTBAA(MD, IMD);

965break;

966case LLVMContext::MD_alias_scope:

967 MD =MDNode::getMostGenericAliasScope(MD, IMD);

968break;

969case LLVMContext::MD_fpmath:

970 MD =MDNode::getMostGenericFPMath(MD, IMD);

971break;

972case LLVMContext::MD_noalias:

973case LLVMContext::MD_nontemporal:

974case LLVMContext::MD_invariant_load:

975 MD =MDNode::intersect(MD, IMD);

976break;

977case LLVMContext::MD_access_group:

978 MD =intersectAccessGroups(Inst, IJ);

979break;

980default:

981llvm_unreachable("unhandled metadata");

982 }

983 }

984

985 Inst->setMetadata(Kind, MD);

986 }

987

988return Inst;

989}

990

991Constant *

992llvm::createBitMaskForGaps(IRBuilderBase &Builder,unsigned VF,

993constInterleaveGroup<Instruction> &Group) {

994// All 1's means mask is not needed.

995if (Group.getNumMembers() == Group.getFactor())

996returnnullptr;

997

998// TODO: support reversed access.

999assert(!Group.isReverse() &&"Reversed group not supported.");

1000

1001SmallVector<Constant *, 16> Mask;

1002for (unsigned i = 0; i < VF; i++)

1003for (unsigned j = 0; j < Group.getFactor(); ++j) {

1004unsigned HasMember = Group.getMember(j) ? 1 : 0;

1005 Mask.push_back(Builder.getInt1(HasMember));

1006 }

1007

1008returnConstantVector::get(Mask);

1009}

1010

1011llvm::SmallVector<int, 16>

1012llvm::createReplicatedMask(unsigned ReplicationFactor,unsigned VF) {

1013SmallVector<int, 16> MaskVec;

1014for (unsigned i = 0; i < VF; i++)

1015for (unsigned j = 0; j < ReplicationFactor; j++)

1016 MaskVec.push_back(i);

1017

1018return MaskVec;

1019}

1020

1021llvm::SmallVector<int, 16>llvm::createInterleaveMask(unsigned VF,

1022unsigned NumVecs) {

1023SmallVector<int, 16> Mask;

1024for (unsigned i = 0; i < VF; i++)

1025for (unsigned j = 0; j < NumVecs; j++)

1026 Mask.push_back(j * VF + i);

1027

1028return Mask;

1029}

1030

1031llvm::SmallVector<int, 16>

1032llvm::createStrideMask(unsigned Start,unsigned Stride,unsigned VF) {

1033SmallVector<int, 16> Mask;

1034for (unsigned i = 0; i < VF; i++)

1035 Mask.push_back(Start + i * Stride);

1036

1037return Mask;

1038}

1039

1040llvm::SmallVector<int, 16>llvm::createSequentialMask(unsigned Start,

1041unsigned NumInts,

1042unsigned NumUndefs) {

1043SmallVector<int, 16> Mask;

1044for (unsigned i = 0; i < NumInts; i++)

1045 Mask.push_back(Start + i);

1046

1047for (unsigned i = 0; i < NumUndefs; i++)

1048 Mask.push_back(-1);

1049

1050return Mask;

1051}

1052

1053llvm::SmallVector<int, 16>llvm::createUnaryMask(ArrayRef<int> Mask,

1054unsigned NumElts) {

1055// Avoid casts in the loop and make sure we have a reasonable number.

1056int NumEltsSigned = NumElts;

1057assert(NumEltsSigned > 0 &&"Expected smaller or non-zero element count");

1058

1059// If the mask chooses an element from operand 1, reduce it to choose from the

1060// corresponding element of operand 0. Undef mask elements are unchanged.

1061SmallVector<int, 16> UnaryMask;

1062for (int MaskElt : Mask) {

1063assert((MaskElt < NumEltsSigned * 2) &&"Expected valid shuffle mask");

1064int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;

1065 UnaryMask.push_back(UnaryElt);

1066 }

1067return UnaryMask;

1068}

1069

1070/// A helper function for concatenating vectors. This function concatenates two

1071/// vectors having the same element type. If the second vector has fewer

1072/// elements than the first, it is padded with undefs.

1073staticValue *concatenateTwoVectors(IRBuilderBase &Builder,Value *V1,

1074Value *V2) {

1075VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());

1076VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());

1077assert(VecTy1 && VecTy2 &&

1078 VecTy1->getScalarType() == VecTy2->getScalarType() &&

1079"Expect two vectors with the same element type");

1080

1081unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();

1082unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();

1083assert(NumElts1 >= NumElts2 &&"Unexpect the first vector has less elements");

1084

1085if (NumElts1 > NumElts2) {

1086// Extend with UNDEFs.

1087 V2 = Builder.CreateShuffleVector(

1088 V2,createSequentialMask(0, NumElts2, NumElts1 - NumElts2));

1089 }

1090

1091return Builder.CreateShuffleVector(

1092 V1, V2,createSequentialMask(0, NumElts1 + NumElts2, 0));

1093}

1094

1095Value *llvm::concatenateVectors(IRBuilderBase &Builder,

1096ArrayRef<Value *> Vecs) {

1097unsigned NumVecs = Vecs.size();

1098assert(NumVecs > 1 &&"Should be at least two vectors");

1099

1100SmallVector<Value *, 8> ResList;

1101 ResList.append(Vecs.begin(), Vecs.end());

1102do {

1103SmallVector<Value *, 8> TmpList;

1104for (unsigned i = 0; i < NumVecs - 1; i += 2) {

1105Value *V0 = ResList[i], *V1 = ResList[i + 1];

1106assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&

1107"Only the last vector may have a different type");

1108

1109 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));

1110 }

1111

1112// Push the last vector if the total number of vectors is odd.

1113if (NumVecs % 2 != 0)

1114 TmpList.push_back(ResList[NumVecs - 1]);

1115

1116 ResList = TmpList;

1117 NumVecs = ResList.size();

1118 }while (NumVecs > 1);

1119

1120return ResList[0];

1121}

1122

1123boolllvm::maskIsAllZeroOrUndef(Value *Mask) {

1124assert(isa<VectorType>(Mask->getType()) &&

1125 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1126 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1127 1 &&

1128"Mask must be a vector of i1");

1129

1130auto *ConstMask = dyn_cast<Constant>(Mask);

1131if (!ConstMask)

1132returnfalse;

1133if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))

1134returntrue;

1135if (isa<ScalableVectorType>(ConstMask->getType()))

1136returnfalse;

1137for (unsigned

1138I = 0,

1139 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();

1140I != E; ++I) {

1141if (auto *MaskElt = ConstMask->getAggregateElement(I))

1142if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))

1143continue;

1144returnfalse;

1145 }

1146returntrue;

1147}

1148

1149boolllvm::maskIsAllOneOrUndef(Value *Mask) {

1150assert(isa<VectorType>(Mask->getType()) &&

1151 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1152 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1153 1 &&

1154"Mask must be a vector of i1");

1155

1156auto *ConstMask = dyn_cast<Constant>(Mask);

1157if (!ConstMask)

1158returnfalse;

1159if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))

1160returntrue;

1161if (isa<ScalableVectorType>(ConstMask->getType()))

1162returnfalse;

1163for (unsigned

1164I = 0,

1165 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();

1166I != E; ++I) {

1167if (auto *MaskElt = ConstMask->getAggregateElement(I))

1168if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))

1169continue;

1170returnfalse;

1171 }

1172returntrue;

1173}

1174

1175boolllvm::maskContainsAllOneOrUndef(Value *Mask) {

1176assert(isa<VectorType>(Mask->getType()) &&

1177 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1178 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1179 1 &&

1180"Mask must be a vector of i1");

1181

1182auto *ConstMask = dyn_cast<Constant>(Mask);

1183if (!ConstMask)

1184returnfalse;

1185if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))

1186returntrue;

1187if (isa<ScalableVectorType>(ConstMask->getType()))

1188returnfalse;

1189for (unsigned

1190I = 0,

1191 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();

1192I != E; ++I) {

1193if (auto *MaskElt = ConstMask->getAggregateElement(I))

1194if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))

1195returntrue;

1196 }

1197returnfalse;

1198}

1199

1200/// TODO: This is a lot like known bits, but for

1201/// vectors. Is there something we can common this with?

1202APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {

1203assert(isa<FixedVectorType>(Mask->getType()) &&

1204 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1205 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1206 1 &&

1207"Mask must be a fixed width vector of i1");

1208

1209constunsigned VWidth =

1210 cast<FixedVectorType>(Mask->getType())->getNumElements();

1211APInt DemandedElts =APInt::getAllOnes(VWidth);

1212if (auto *CV = dyn_cast<ConstantVector>(Mask))

1213for (unsigned i = 0; i < VWidth; i++)

1214if (CV->getAggregateElement(i)->isNullValue())

1215 DemandedElts.clearBit(i);

1216return DemandedElts;

1217}

1218

1219bool InterleavedAccessInfo::isStrided(int Stride) {

1220unsigned Factor = std::abs(Stride);

1221return Factor >= 2 && Factor <=MaxInterleaveGroupFactor;

1222}

1223

1224void InterleavedAccessInfo::collectConstStrideAccesses(

1225MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,

1226constDenseMap<Value*, const SCEV*> &Strides) {

1227auto &DL = TheLoop->getHeader()->getDataLayout();

1228

1229// Since it's desired that the load/store instructions be maintained in

1230// "program order" for the interleaved access analysis, we have to visit the

1231// blocks in the loop in reverse postorder (i.e., in a topological order).

1232// Such an ordering will ensure that any load/store that may be executed

1233// before a second load/store will precede the second load/store in

1234// AccessStrideInfo.

1235LoopBlocksDFS DFS(TheLoop);

1236 DFS.perform(LI);

1237for (BasicBlock *BB :make_range(DFS.beginRPO(), DFS.endRPO()))

1238for (auto &I : *BB) {

1239Value *Ptr =getLoadStorePointerOperand(&I);

1240if (!Ptr)

1241continue;

1242Type *ElementTy =getLoadStoreType(&I);

1243

1244// Currently, codegen doesn't support cases where the type size doesn't

1245// match the alloc size. Skip them for now.

1246uint64_t Size =DL.getTypeAllocSize(ElementTy);

1247if (Size * 8 !=DL.getTypeSizeInBits(ElementTy))

1248continue;

1249

1250// We don't check wrapping here because we don't know yet if Ptr will be

1251// part of a full group or a group with gaps. Checking wrapping for all

1252// pointers (even those that end up in groups with no gaps) will be overly

1253// conservative. For full groups, wrapping should be ok since if we would

1254// wrap around the address space we would do a memory access at nullptr

1255// even without the transformation. The wrapping checks are therefore

1256// deferred until after we've formed the interleaved groups.

1257 int64_t Stride =

1258getPtrStride(PSE, ElementTy,Ptr, TheLoop, Strides,

1259/*Assume=*/true,/*ShouldCheckWrap=*/false).value_or(0);

1260

1261constSCEV *Scev =replaceSymbolicStrideSCEV(PSE, Strides,Ptr);

1262 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev,Size,

1263getLoadStoreAlignment(&I));

1264 }

1265}

1266

1267// Analyze interleaved accesses and collect them into interleaved load and

1268// store groups.

1269//

1270// When generating code for an interleaved load group, we effectively hoist all

1271// loads in the group to the location of the first load in program order. When

1272// generating code for an interleaved store group, we sink all stores to the

1273// location of the last store. This code motion can change the order of load

1274// and store instructions and may break dependences.

1275//

1276// The code generation strategy mentioned above ensures that we won't violate

1277// any write-after-read (WAR) dependences.

1278//

1279// E.g., for the WAR dependence: a = A[i]; // (1)

1280// A[i] = b; // (2)

1281//

1282// The store group of (2) is always inserted at or below (2), and the load

1283// group of (1) is always inserted at or above (1). Thus, the instructions will

1284// never be reordered. All other dependences are checked to ensure the

1285// correctness of the instruction reordering.

1286//

1287// The algorithm visits all memory accesses in the loop in bottom-up program

1288// order. Program order is established by traversing the blocks in the loop in

1289// reverse postorder when collecting the accesses.

1290//

1291// We visit the memory accesses in bottom-up order because it can simplify the

1292// construction of store groups in the presence of write-after-write (WAW)

1293// dependences.

1294//

1295// E.g., for the WAW dependence: A[i] = a; // (1)

1296// A[i] = b; // (2)

1297// A[i + 1] = c; // (3)

1298//

1299// We will first create a store group with (3) and (2). (1) can't be added to

1300// this group because it and (2) are dependent. However, (1) can be grouped

1301// with other accesses that may precede it in program order. Note that a

1302// bottom-up order does not imply that WAW dependences should not be checked.

1303voidInterleavedAccessInfo::analyzeInterleaving(

1304bool EnablePredicatedInterleavedMemAccesses) {

1305LLVM_DEBUG(dbgs() <<"LV: Analyzing interleaved accesses...\n");

1306constauto &Strides = LAI->getSymbolicStrides();

1307

1308// Holds all accesses with a constant stride.

1309MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;

1310 collectConstStrideAccesses(AccessStrideInfo, Strides);

1311

1312if (AccessStrideInfo.empty())

1313return;

1314

1315// Collect the dependences in the loop.

1316 collectDependences();

1317

1318// Holds all interleaved store groups temporarily.

1319SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;

1320// Holds all interleaved load groups temporarily.

1321SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;

1322// Groups added to this set cannot have new members added.

1323SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;

1324

1325// Search in bottom-up program order for pairs of accesses (A and B) that can

1326// form interleaved load or store groups. In the algorithm below, access A

1327// precedes access B in program order. We initialize a group for B in the

1328// outer loop of the algorithm, and then in the inner loop, we attempt to

1329// insert each A into B's group if:

1330//

1331// 1. A and B have the same stride,

1332// 2. A and B have the same memory object size, and

1333// 3. A belongs in B's group according to its distance from B.

1334//

1335// Special care is taken to ensure group formation will not break any

1336// dependences.

1337for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();

1338 BI != E; ++BI) {

1339Instruction *B = BI->first;

1340 StrideDescriptor DesB = BI->second;

1341

1342// Initialize a group for B if it has an allowable stride. Even if we don't

1343// create a group for B, we continue with the bottom-up algorithm to ensure

1344// we don't break any of B's dependences.

1345InterleaveGroup<Instruction> *GroupB =nullptr;

1346if (isStrided(DesB.Stride) &&

1347 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {

1348 GroupB =getInterleaveGroup(B);

1349if (!GroupB) {

1350LLVM_DEBUG(dbgs() <<"LV: Creating an interleave group with:" << *B

1351 <<'\n');

1352 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);

1353if (B->mayWriteToMemory())

1354 StoreGroups.insert(GroupB);

1355else

1356 LoadGroups.insert(GroupB);

1357 }

1358 }

1359

1360for (auto AI = std::next(BI); AI != E; ++AI) {

1361Instruction *A = AI->first;

1362 StrideDescriptor DesA = AI->second;

1363

1364// Our code motion strategy implies that we can't have dependences

1365// between accesses in an interleaved group and other accesses located

1366// between the first and last member of the group. Note that this also

1367// means that a group can't have more than one member at a given offset.

1368// The accesses in a group can have dependences with other accesses, but

1369// we must ensure we don't extend the boundaries of the group such that

1370// we encompass those dependent accesses.

1371//

1372// For example, assume we have the sequence of accesses shown below in a

1373// stride-2 loop:

1374//

1375// (1, 2) is a group | A[i] = a; // (1)

1376// | A[i-1] = b; // (2) |

1377// A[i-3] = c; // (3)

1378// A[i] = d; // (4) | (2, 4) is not a group

1379//

1380// Because accesses (2) and (3) are dependent, we can group (2) with (1)

1381// but not with (4). If we did, the dependent access (3) would be within

1382// the boundaries of the (2, 4) group.

1383auto DependentMember = [&](InterleaveGroup<Instruction> *Group,

1384 StrideEntry *A) ->Instruction * {

1385for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {

1386Instruction *MemberOfGroupB = Group->getMember(Index);

1387if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(

1388A, &*AccessStrideInfo.find(MemberOfGroupB)))

1389return MemberOfGroupB;

1390 }

1391returnnullptr;

1392 };

1393

1394auto GroupA =getInterleaveGroup(A);

1395// If A is a load, dependencies are tolerable, there's nothing to do here.

1396// If both A and B belong to the same (store) group, they are independent,

1397// even if dependencies have not been recorded.

1398// If both GroupA and GroupB are null, there's nothing to do here.

1399if (A->mayWriteToMemory() && GroupA != GroupB) {

1400Instruction *DependentInst =nullptr;

1401// If GroupB is a load group, we have to compare AI against all

1402// members of GroupB because if any load within GroupB has a dependency

1403// on AI, we need to mark GroupB as complete and also release the

1404// store GroupA (if A belongs to one). The former prevents incorrect

1405// hoisting of load B above store A while the latter prevents incorrect

1406// sinking of store A below load B.

1407if (GroupB && LoadGroups.contains(GroupB))

1408 DependentInst = DependentMember(GroupB, &*AI);

1409elseif (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))

1410 DependentInst =B;

1411

1412if (DependentInst) {

1413// A has a store dependence on B (or on some load within GroupB) and

1414// is part of a store group. Release A's group to prevent illegal

1415// sinking of A below B. A will then be free to form another group

1416// with instructions that precede it.

1417if (GroupA && StoreGroups.contains(GroupA)) {

1418LLVM_DEBUG(dbgs() <<"LV: Invalidated store group due to "

1419"dependence between "

1420 << *A <<" and " << *DependentInst <<'\n');

1421 StoreGroups.remove(GroupA);

1422 releaseGroup(GroupA);

1423 }

1424// If B is a load and part of an interleave group, no earlier loads

1425// can be added to B's interleave group, because this would mean the

1426// DependentInst would move across store A. Mark the interleave group

1427// as complete.

1428if (GroupB && LoadGroups.contains(GroupB)) {

1429LLVM_DEBUG(dbgs() <<"LV: Marking interleave group for " << *B

1430 <<" as complete.\n");

1431 CompletedLoadGroups.insert(GroupB);

1432 }

1433 }

1434 }

1435if (CompletedLoadGroups.contains(GroupB)) {

1436// Skip trying to add A to B, continue to look for other conflicting A's

1437// in groups to be released.

1438continue;

1439 }

1440

1441// At this point, we've checked for illegal code motion. If either A or B

1442// isn't strided, there's nothing left to do.

1443if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))

1444continue;

1445

1446// Ignore A if it's already in a group or isn't the same kind of memory

1447// operation as B.

1448// Note that mayReadFromMemory() isn't mutually exclusive to

1449// mayWriteToMemory in the case of atomic loads. We shouldn't see those

1450// here, canVectorizeMemory() should have returned false - except for the

1451// case we asked for optimization remarks.

1452if (isInterleaved(A) ||

1453 (A->mayReadFromMemory() !=B->mayReadFromMemory()) ||

1454 (A->mayWriteToMemory() !=B->mayWriteToMemory()))

1455continue;

1456

1457// Check rules 1 and 2. Ignore A if its stride or size is different from

1458// that of B.

1459if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)

1460continue;

1461

1462// Ignore A if the memory object of A and B don't belong to the same

1463// address space

1464if (getLoadStoreAddressSpace(A) !=getLoadStoreAddressSpace(B))

1465continue;

1466

1467// Calculate the distance from A to B.

1468constSCEVConstant *DistToB = dyn_cast<SCEVConstant>(

1469 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));

1470if (!DistToB)

1471continue;

1472 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();

1473

1474// Check rule 3. Ignore A if its distance to B is not a multiple of the

1475// size.

1476if (DistanceToB %static_cast<int64_t>(DesB.Size))

1477continue;

1478

1479// All members of a predicated interleave-group must have the same predicate,

1480// and currently must reside in the same BB.

1481BasicBlock *BlockA =A->getParent();

1482BasicBlock *BlockB =B->getParent();

1483if ((isPredicated(BlockA) || isPredicated(BlockB)) &&

1484 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))

1485continue;

1486

1487// The index of A is the index of B plus A's distance to B in multiples

1488// of the size.

1489int IndexA =

1490 GroupB->getIndex(B) + DistanceToB /static_cast<int64_t>(DesB.Size);

1491

1492// Try to insert A into B's group.

1493if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {

1494LLVM_DEBUG(dbgs() <<"LV: Inserted:" << *A <<'\n'

1495 <<" into the interleave group with" << *B

1496 <<'\n');

1497 InterleaveGroupMap[A] = GroupB;

1498

1499// Set the first load in program order as the insert position.

1500if (A->mayReadFromMemory())

1501 GroupB->setInsertPos(A);

1502 }

1503 }// Iteration over A accesses.

1504 }// Iteration over B accesses.

1505

1506auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,

1507int Index,

1508constchar *FirstOrLast) ->bool {

1509Instruction *Member = Group->getMember(Index);

1510assert(Member &&"Group member does not exist");

1511Value *MemberPtr =getLoadStorePointerOperand(Member);

1512Type *AccessTy =getLoadStoreType(Member);

1513if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,

1514/*Assume=*/false,/*ShouldCheckWrap=*/true).value_or(0))

1515returnfalse;

1516LLVM_DEBUG(dbgs() <<"LV: Invalidate candidate interleaved group due to "

1517 << FirstOrLast

1518 <<" group member potentially pointer-wrapping.\n");

1519 releaseGroup(Group);

1520returntrue;

1521 };

1522

1523// Remove interleaved groups with gaps whose memory

1524// accesses may wrap around. We have to revisit the getPtrStride analysis,

1525// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does

1526// not check wrapping (see documentation there).

1527// FORNOW we use Assume=false;

1528// TODO: Change to Assume=true but making sure we don't exceed the threshold

1529// of runtime SCEV assumptions checks (thereby potentially failing to

1530// vectorize altogether).

1531// Additional optional optimizations:

1532// TODO: If we are peeling the loop and we know that the first pointer doesn't

1533// wrap then we can deduce that all pointers in the group don't wrap.

1534// This means that we can forcefully peel the loop in order to only have to

1535// check the first pointer for no-wrap. When we'll change to use Assume=true

1536// we'll only need at most one runtime check per interleaved group.

1537for (auto *Group : LoadGroups) {

1538// Case 1: A full group. Can Skip the checks; For full groups, if the wide

1539// load would wrap around the address space we would do a memory access at

1540// nullptr even without the transformation.

1541if (Group->getNumMembers() == Group->getFactor())

1542continue;

1543

1544// Case 2: If first and last members of the group don't wrap this implies

1545// that all the pointers in the group don't wrap.

1546// So we check only group member 0 (which is always guaranteed to exist),

1547// and group member Factor - 1; If the latter doesn't exist we rely on

1548// peeling (if it is a non-reversed access -- see Case 3).

1549if (InvalidateGroupIfMemberMayWrap(Group, 0,"first"))

1550continue;

1551if (Group->getMember(Group->getFactor() - 1))

1552 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,"last");

1553else {

1554// Case 3: A non-reversed interleaved load group with gaps: We need

1555// to execute at least one scalar epilogue iteration. This will ensure

1556// we don't speculatively access memory out-of-bounds. We only need

1557// to look for a member at index factor - 1, since every group must have

1558// a member at index zero.

1559if (Group->isReverse()) {

1560LLVM_DEBUG(

1561dbgs() <<"LV: Invalidate candidate interleaved group due to "

1562"a reverse access with gaps.\n");

1563 releaseGroup(Group);

1564continue;

1565 }

1566LLVM_DEBUG(

1567dbgs() <<"LV: Interleaved group requires epilogue iteration.\n");

1568 RequiresScalarEpilogue =true;

1569 }

1570 }

1571

1572for (auto *Group : StoreGroups) {

1573// Case 1: A full group. Can Skip the checks; For full groups, if the wide

1574// store would wrap around the address space we would do a memory access at

1575// nullptr even without the transformation.

1576if (Group->getNumMembers() == Group->getFactor())

1577continue;

1578

1579// Interleave-store-group with gaps is implemented using masked wide store.

1580// Remove interleaved store groups with gaps if

1581// masked-interleaved-accesses are not enabled by the target.

1582if (!EnablePredicatedInterleavedMemAccesses) {

1583LLVM_DEBUG(

1584dbgs() <<"LV: Invalidate candidate interleaved store group due "

1585"to gaps.\n");

1586 releaseGroup(Group);

1587continue;

1588 }

1589

1590// Case 2: If first and last members of the group don't wrap this implies

1591// that all the pointers in the group don't wrap.

1592// So we check only group member 0 (which is always guaranteed to exist),

1593// and the last group member. Case 3 (scalar epilog) is not relevant for

1594// stores with gaps, which are implemented with masked-store (rather than

1595// speculative access, as in loads).

1596if (InvalidateGroupIfMemberMayWrap(Group, 0,"first"))

1597continue;

1598for (int Index = Group->getFactor() - 1; Index > 0; Index--)

1599if (Group->getMember(Index)) {

1600 InvalidateGroupIfMemberMayWrap(Group, Index,"last");

1601break;

1602 }

1603 }

1604}

1605

1606voidInterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {

1607// If no group had triggered the requirement to create an epilogue loop,

1608// there is nothing to do.

1609if (!requiresScalarEpilogue())

1610return;

1611

1612// Release groups requiring scalar epilogues. Note that this also removes them

1613// from InterleaveGroups.

1614bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {

1615if (!Group->requiresScalarEpilogue())

1616returnfalse;

1617LLVM_DEBUG(

1618dbgs()

1619 <<"LV: Invalidate candidate interleaved group due to gaps that "

1620"require a scalar epilogue (not allowed under optsize) and cannot "

1621"be masked (not enabled). \n");

1622 releaseGroupWithoutRemovingFromSet(Group);

1623returntrue;

1624 });

1625assert(ReleasedGroup &&"At least one group must be invalidated, as a "

1626"scalar epilogue was required");

1627 (void)ReleasedGroup;

1628 RequiresScalarEpilogue =false;

1629}

1630

1631template <typename InstT>

1632voidInterleaveGroup<InstT>::addMetadata(InstT *NewInst) const{

1633llvm_unreachable("addMetadata can only be used for Instruction");

1634}

1635

1636namespacellvm {

1637template <>

1638voidInterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const{

1639SmallVector<Value *, 4> VL;

1640 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),

1641 [](std::pair<int, Instruction *> p) { return p.second; });

1642propagateMetadata(NewInst, VL);

1643}

1644}// namespace llvm

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

CommandLine.h

Constants.h

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition:Debug.h:106

DemandedBits.h

DerivedTypes.h

Size

uint64_t Size

Definition:ELFObjHandler.cpp:81

Blocks

DenseMap< Block *, BlockRelaxAux > Blocks

Definition:ELF_riscv.cpp:507

EquivalenceClasses.h

Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

IRBuilder.h

IRTranslator LLVM IR MI

Definition:IRTranslator.cpp:112

Value.h

LoopInfo.h

LoopIterator.h

#define I(x, y, z)

Definition:MD5.cpp:58

MemoryModelRelaxationAnnotations.h

This file provides utility for Memory Model Relaxation Annotations (MMRAs).

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

PatternMatch.h

List

const NodeList & List

Definition:RDFGraph.cpp:200

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

ScalarEvolutionExpressions.h

ScalarEvolution.h

SmallVector.h

This file defines the SmallVector class.

getScalarSizeInBits

static unsigned getScalarSizeInBits(Type *Ty)

Definition:SystemZTargetTransformInfo.cpp:510

getType

static SymbolRef::Type getType(const Symbol *Sym)

Definition:TapiFile.cpp:39

Ptr

@ Ptr

Definition:TargetLibraryInfo.cpp:77

TargetTransformInfo.h

This pass exposes codegen information to IR-level passes.

ValueTracking.h

concatenateTwoVectors

static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)

A helper function for concatenating vectors.

Definition:VectorUtils.cpp:1073

MaxInterleaveGroupFactor

static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))

Maximum factor for an interleaved memory access.

addToAccessGroupList

static void addToAccessGroupList(ListT &List, MDNode *AccGroups)

Add all access groups in AccGroups to List.

Definition:VectorUtils.cpp:859

VectorUtils.h

VectorType

Definition:ItaniumDemangle.h:1173

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::APInt::getAllOnes

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

Definition:APInt.h:234

llvm::APInt::clearBit

void clearBit(unsigned BitPosition)

Set a given bit to 0.

Definition:APInt.h:1407

llvm::APInt::setBit

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

Definition:APInt.h:1330

llvm::APInt::isZero

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

Definition:APInt.h:380

llvm::APInt::getBitWidth

unsigned getBitWidth() const

Return the number of bits in the APInt.

Definition:APInt.h:1468

llvm::APInt::getZero

static APInt getZero(unsigned numBits)

Get the '0' value for the specified bit-width.

Definition:APInt.h:200

llvm::APInt::getSExtValue

int64_t getSExtValue() const

Get sign extended value.

Definition:APInt.h:1542

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::ArrayRef::front

const T & front() const

front - Get the first element.

Definition:ArrayRef.h:171

llvm::ArrayRef::end

iterator end() const

Definition:ArrayRef.h:157

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition:ArrayRef.h:168

llvm::ArrayRef::begin

iterator begin() const

Definition:ArrayRef.h:156

llvm::ArrayRef::empty

bool empty() const

empty - Check if the array is empty.

Definition:ArrayRef.h:163

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::getDataLayout

const DataLayout & getDataLayout() const

Get the data layout of the module this basic block belongs to.

Definition:BasicBlock.cpp:296

llvm::CallInst

This class represents a function call, abstracting a target machine's calling convention.

Definition:Instructions.h:1479

llvm::ConstantVector::get

static Constant * get(ArrayRef< Constant * > V)

Definition:Constants.cpp:1421

llvm::Constant

This is an important base class in LLVM.

Definition:Constant.h:42

llvm::DemandedBits

Definition:DemandedBits.h:40

llvm::DenseMapBase::count

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

Definition:DenseMap.h:152

llvm::DenseMap

Definition:DenseMap.h:727

llvm::EquivalenceClasses

EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...

Definition:EquivalenceClasses.h:60

llvm::EquivalenceClasses::begin

iterator begin() const

Definition:EquivalenceClasses.h:167

llvm::EquivalenceClasses::getOrInsertLeaderValue

const ElemTy & getOrInsertLeaderValue(const ElemTy &V)

getOrInsertLeaderValue - Return the leader for the specified value that is in the set.

Definition:EquivalenceClasses.h:200

llvm::EquivalenceClasses::member_end

member_iterator member_end() const

Definition:EquivalenceClasses.h:178

llvm::EquivalenceClasses::end

iterator end() const

Definition:EquivalenceClasses.h:168

llvm::EquivalenceClasses::member_begin

member_iterator member_begin(iterator I) const

Definition:EquivalenceClasses.h:174

llvm::EquivalenceClasses::unionSets

member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)

union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...

Definition:EquivalenceClasses.h:238

llvm::IRBuilderBase

Common base class shared among various IRBuilders.

Definition:IRBuilder.h:113

llvm::IRBuilderBase::getInt1

ConstantInt * getInt1(bool V)

Get a constant value representing either true or false.

Definition:IRBuilder.h:480

llvm::IRBuilderBase::CreateShuffleVector

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Definition:IRBuilder.h:2533

llvm::InsertElementInst

This instruction inserts a single (scalar) element into a VectorType value.

Definition:Instructions.h:1834

llvm::Instruction

Definition:Instruction.h:68

llvm::Instruction::mayReadOrWriteMemory

bool mayReadOrWriteMemory() const

Return true if this instruction may read or write memory.

Definition:Instruction.h:780

llvm::Instruction::getMetadata

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

Definition:Instruction.h:407

llvm::Instruction::setMetadata

void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

Definition:Metadata.cpp:1679

llvm::Instruction::getAllMetadataOtherThanDebugLoc

void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const

This does the same thing as getAllMetadata, except that it filters out the debug location.

Definition:Instruction.h:432

llvm::InterleaveGroup

The group of interleaved loads/stores sharing the same stride and close to each other.

Definition:VectorUtils.h:488

llvm::InterleaveGroup::getFactor

uint32_t getFactor() const

Definition:VectorUtils.h:504

llvm::InterleaveGroup::getMember

InstTy * getMember(uint32_t Index) const

Get the member with the given index Index.

Definition:VectorUtils.h:558

llvm::InterleaveGroup::getIndex

uint32_t getIndex(const InstTy *Instr) const

Get the index for the given member.

Definition:VectorUtils.h:565

llvm::InterleaveGroup::setInsertPos

void setInsertPos(InstTy *Inst)

Definition:VectorUtils.h:575

llvm::InterleaveGroup::isReverse

bool isReverse() const

Definition:VectorUtils.h:503

llvm::InterleaveGroup::addMetadata

void addMetadata(InstTy *NewInst) const

Add metadata (e.g.

Definition:VectorUtils.cpp:1632

llvm::InterleaveGroup::insertMember

bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)

Try to insert a new member Instr with index Index and alignment NewAlign.

Definition:VectorUtils.h:513

llvm::InterleaveGroup::getNumMembers

uint32_t getNumMembers() const

Definition:VectorUtils.h:506

llvm::InterleavedAccessInfo::getInterleaveGroup

InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const

Get the interleave group that Instr belongs to.

Definition:VectorUtils.h:675

llvm::InterleavedAccessInfo::requiresScalarEpilogue

bool requiresScalarEpilogue() const

Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...

Definition:VectorUtils.h:686

llvm::InterleavedAccessInfo::isInterleaved

bool isInterleaved(Instruction *Instr) const

Check if Instr belongs to any interleave group.

Definition:VectorUtils.h:667

llvm::InterleavedAccessInfo::analyzeInterleaving

void analyzeInterleaving(bool EnableMaskedInterleavedGroup)

Analyze the interleaved accesses and collect them in interleave groups.

Definition:VectorUtils.cpp:1303

llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue

void invalidateGroupsRequiringScalarEpilogue()

Invalidate groups that require a scalar epilogue (due to gaps).

Definition:VectorUtils.cpp:1606

llvm::LLVMContext

This is an important class for using LLVM in a threaded context.

Definition:LLVMContext.h:67

llvm::LoopAccessInfo::getSymbolicStrides

const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const

If an access has a symbolic strides, this maps the pointer value to the stride symbol.

Definition:LoopAccessAnalysis.h:693

llvm::LoopBase::getHeader

BlockT * getHeader() const

Definition:GenericLoopInfo.h:90

llvm::LoopBlocksDFS

Store the result of a depth first search within basic blocks contained by a single loop.

Definition:LoopIterator.h:97

llvm::MDNode

Metadata node.

Definition:Metadata.h:1073

llvm::MDNode::getMostGenericAliasScope

static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)

Definition:Metadata.cpp:1141

llvm::MDNode::getMostGenericTBAA

static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)

Definition:TypeBasedAliasAnalysis.cpp:477

llvm::MDNode::operands

ArrayRef< MDOperand > operands() const

Definition:Metadata.h:1432

llvm::MDNode::get

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

Definition:Metadata.h:1549

llvm::MDNode::getMostGenericFPMath

static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)

Definition:Metadata.cpp:1173

llvm::MDNode::getNumOperands

unsigned getNumOperands() const

Return number of MDNode operands.

Definition:Metadata.h:1440

llvm::MDNode::intersect

static MDNode * intersect(MDNode *A, MDNode *B)

Definition:Metadata.cpp:1128

llvm::MDNode::getContext

LLVMContext & getContext() const

Definition:Metadata.h:1237

llvm::MDOperand

Tracking metadata reference owned by Metadata.

Definition:Metadata.h:895

llvm::MMRAMetadata::combine

static MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)

Combines A and B according to MMRA semantics.

Definition:MemoryModelRelaxationAnnotations.cpp:78

llvm::MapVector

This class implements a map that also provides access to all stored values in a deterministic order.

Definition:MapVector.h:36

llvm::MapVector::rend

reverse_iterator rend()

Definition:MapVector.h:76

llvm::MapVector::find

iterator find(const KeyT &Key)

Definition:MapVector.h:167

llvm::MapVector::empty

bool empty() const

Definition:MapVector.h:79

llvm::MapVector::rbegin

reverse_iterator rbegin()

Definition:MapVector.h:74

llvm::Metadata

Root of the metadata hierarchy.

Definition:Metadata.h:62

llvm::MutableArrayRef

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

Definition:ArrayRef.h:310

llvm::PoisonValue::get

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition:Constants.cpp:1878

llvm::PredicatedScalarEvolution::getSE

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

Definition:ScalarEvolution.h:2422

llvm::SCEVConstant

This class represents a constant integer value.

Definition:ScalarEvolutionExpressions.h:60

llvm::SCEVConstant::getAPInt

const APInt & getAPInt() const

Definition:ScalarEvolutionExpressions.h:70

llvm::SCEV

This class represents an analyzed expression in the program.

Definition:ScalarEvolution.h:71

llvm::ScalarEvolution::getMinusSCEV

const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

Definition:ScalarEvolution.cpp:4655

llvm::SetVector::remove

bool remove(const value_type &X)

Remove an item from the set vector.

Definition:SetVector.h:188

llvm::SetVector::insert

bool insert(const value_type &X)

Insert a new element into the SetVector.

Definition:SetVector.h:162

llvm::SetVector::contains

bool contains(const key_type &key) const

Check if the SetVector contains the given key.

Definition:SetVector.h:254

llvm::ShuffleVectorInst

This instruction constructs a fixed permutation of two input vectors.

Definition:Instructions.h:1901

llvm::ShuffleVectorInst::getMaskValue

int getMaskValue(unsigned Elt) const

Return the shuffle mask value of this instruction for the given element index.

Definition:Instructions.h:1950

llvm::ShuffleVectorInst::getType

VectorType * getType() const

Overload to return most specific vector type.

Definition:Instructions.h:1941

llvm::SmallPtrSetImpl::count

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

Definition:SmallPtrSet.h:452

llvm::SmallPtrSetImpl::insert

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

Definition:SmallPtrSet.h:384

llvm::SmallPtrSetImpl::contains

bool contains(ConstPtrType Ptr) const

Definition:SmallPtrSet.h:458

llvm::SmallPtrSet

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

Definition:SmallPtrSet.h:519

llvm::SmallSetVector

A SetVector that performs no allocations if smaller than a certain size.

Definition:SetVector.h:370

llvm::SmallVectorBase::empty

bool empty() const

Definition:SmallVector.h:81

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::pop_back_val

T pop_back_val()

Definition:SmallVector.h:673

llvm::SmallVectorImpl::assign

void assign(size_type NumElts, ValueParamT Elt)

Definition:SmallVector.h:704

llvm::SmallVectorImpl::reserve

void reserve(size_type N)

Definition:SmallVector.h:663

llvm::SmallVectorImpl::append

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

Definition:SmallVector.h:683

llvm::SmallVectorImpl::clear

void clear()

Definition:SmallVector.h:610

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVectorTemplateCommon::front

reference front()

Definition:SmallVector.h:299

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::TargetLibraryInfo

Provides information about what library functions are available for the current target.

Definition:TargetLibraryInfo.h:280

llvm::TargetTransformInfo

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Definition:TargetTransformInfo.h:212

llvm::TargetTransformInfo::isTypeLegal

bool isTypeLegal(Type *Ty) const

Return true if this type is legal.

Definition:TargetTransformInfo.cpp:583

llvm::TargetTransformInfo::isTargetIntrinsicWithStructReturnOverloadAtField

bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const

Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...

Definition:TargetTransformInfo.cpp:623

llvm::TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable

bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const

Definition:TargetTransformInfo.cpp:608

llvm::TargetTransformInfo::isTargetIntrinsicWithScalarOpAtArg

bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const

Identifies if the vector form of the intrinsic has a scalar operand.

Definition:TargetTransformInfo.cpp:613

llvm::TargetTransformInfo::isTargetIntrinsicWithOverloadTypeAtArg

bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const

Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...

Definition:TargetTransformInfo.cpp:618

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::getScalarSizeInBits

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Use

A Use represents the edge between a Value definition and its users.

Definition:Use.h:43

llvm::User::getOperand

Value * getOperand(unsigned i) const

Definition:User.h:228

llvm::VPCastIntrinsic::isVPCast

static bool isVPCast(Intrinsic::ID ID)

Definition:IntrinsicInst.cpp:726

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::getContext

LLVMContext & getContext() const

All values hold a context through their type.

Definition:Value.cpp:1075

llvm::VectorType

Base class of all SIMD vector types.

Definition:DerivedTypes.h:427

llvm::VectorType::getElementType

Type * getElementType() const

Definition:DerivedTypes.h:460

llvm::cl::opt

Definition:CommandLine.h:1423

llvm::function_ref

An efficient, type-erasing, non-owning reference to a callable.

Definition:STLFunctionalExtras.h:37

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

llvm::BitmaskEnumDetail::Mask

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

Definition:BitmaskEnum.h:125

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition:CallingConv.h:24

llvm::Intrinsic::not_intrinsic

@ not_intrinsic

Definition:Intrinsics.h:44

llvm::Intrinsic::isTargetIntrinsic

bool isTargetIntrinsic(ID IID)

isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.

Definition:Intrinsics.cpp:618

llvm::PatternMatch

Definition:PatternMatch.h:47

llvm::PatternMatch::m_Add

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

Definition:PatternMatch.h:1102

llvm::PatternMatch::m_BinOp

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

Definition:PatternMatch.h:100

llvm::PatternMatch::m_Constant

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

Definition:PatternMatch.h:165

llvm::PatternMatch::match

bool match(Val *V, const Pattern &P)

Definition:PatternMatch.h:49

llvm::PatternMatch::m_Select

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

Definition:PatternMatch.h:1799

llvm::PatternMatch::m_ZeroInt

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

Definition:PatternMatch.h:599

llvm::PatternMatch::m_Shuffle

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

Definition:PatternMatch.h:1911

llvm::PatternMatch::m_Value

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

Definition:PatternMatch.h:92

llvm::PatternMatch::m_InsertElt

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

Definition:PatternMatch.h:1829

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::isTriviallyScalarizable

bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)

Identify if the intrinsic is trivially scalarizable.

Definition:VectorUtils.cpp:116

llvm::all_of

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1739

llvm::getLoadStoreAddressSpace

unsigned getLoadStoreAddressSpace(const Value *I)

A helper function that returns the address space of the pointer operand of load or store instruction.

Definition:Instructions.h:5030

llvm::getVectorIntrinsicIDForCall

Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

Definition:VectorUtils.cpp:209

llvm::possiblyDemandedEltsInMask

APInt possiblyDemandedEltsInMask(Value *Mask)

Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...

Definition:VectorUtils.cpp:1202

llvm::Depth

@ Depth

Definition:SIMachineScheduler.h:36

llvm::getLoadStorePointerOperand

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

Definition:Instructions.h:4984

llvm::createUnaryMask

llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)

Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...

Definition:VectorUtils.cpp:1053

llvm::make_range

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

Definition:iterator_range.h:77

llvm::bit_width

int bit_width(T Value)

Returns the number of bits needed to represent Value if Value is nonzero.

Definition:bit.h:317

llvm::concatenateVectors

Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)

Concatenate a list of vectors.

Definition:VectorUtils.cpp:1095

llvm::getLoadStoreAlignment

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

Definition:Instructions.h:5010

llvm::widenShuffleMaskElts

bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...

Definition:VectorUtils.cpp:431

llvm::propagateMetadata

Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

Definition:VectorUtils.cpp:942

llvm::getSplatValue

Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

Definition:VectorUtils.cpp:312

llvm::bit_ceil

T bit_ceil(T Value)

Returns the smallest integral power of two no smaller than Value if Value is nonzero.

Definition:bit.h:342

llvm::intersectAccessGroups

MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)

Compute the access-group list of access groups that Inst1 and Inst2 are both in.

Definition:VectorUtils.cpp:895

llvm::M1

unsigned M1(unsigned Val)

Definition:VE.h:376

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1746

llvm::getShuffleDemandedElts

bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)

Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...

Definition:VectorUtils.cpp:373

llvm::isSplatValue

bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

Definition:VectorUtils.cpp:327

llvm::createBitMaskForGaps

Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)

Create a mask that filters the members of an interleave group where there are gaps.

Definition:VectorUtils.cpp:992

llvm::MaxAnalysisRecursionDepth

constexpr unsigned MaxAnalysisRecursionDepth

Definition:ValueTracking.h:44

llvm::createStrideMask

llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)

Create a stride shuffle mask.

Definition:VectorUtils.cpp:1032

llvm::getHorizDemandedEltsForFirstOperand

void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)

Compute the demanded elements mask of horizontal binary operations.

Definition:VectorUtils.cpp:671

llvm::createReplicatedMask

llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)

Create a mask with replicated elements.

Definition:VectorUtils.cpp:1012

llvm::ComplexDeinterleavingOperation::Splat

@ Splat

llvm::dbgs

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition:Debug.cpp:163

llvm::getPtrStride

std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

Definition:LoopAccessAnalysis.cpp:1435

llvm::maskIsAllOneOrUndef

bool maskIsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

Definition:VectorUtils.cpp:1149

llvm::PoisonMaskElem

constexpr int PoisonMaskElem

Definition:Instructions.h:1889

llvm::isValidAsAccessGroup

bool isValidAsAccessGroup(MDNode *AccGroup)

Return whether an MDNode might represent an access group.

Definition:LoopInfo.cpp:1166

llvm::getIntrinsicForCallSite

Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)

Map a call instruction to an intrinsic ID.

Definition:ValueTracking.cpp:4245

llvm::isVectorIntrinsicWithStructReturnOverloadAtField

bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...

Definition:VectorUtils.cpp:192

llvm::narrowShuffleMaskElts

void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...

Definition:VectorUtils.cpp:410

llvm::createInterleaveMask

llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)

Create an interleave shuffle mask.

Definition:VectorUtils.cpp:1021

llvm::isVectorIntrinsicWithScalarOpAtArg

bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic has a scalar operand.

Definition:VectorUtils.cpp:134

llvm::replaceSymbolicStrideSCEV

const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)

Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...

Definition:LoopAccessAnalysis.cpp:154

llvm::findScalarElement

Value * findScalarElement(Value *V, unsigned EltNo)

Given a vector and an element number, see if the scalar value is already around as a register,...

Definition:VectorUtils.cpp:226

llvm::uniteAccessGroups

MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)

Compute the union of two access-group lists.

Definition:VectorUtils.cpp:874

llvm::M0

unsigned M0(unsigned Val)

Definition:VE.h:375

llvm::count_if

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Definition:STLExtras.h:1945

llvm::maskIsAllZeroOrUndef

bool maskIsAllZeroOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

Definition:VectorUtils.cpp:1123

llvm::find_if

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1766

llvm::getShuffleMaskWithWidestElts

void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...

Definition:VectorUtils.cpp:542

llvm::getLoadStoreType

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

Definition:Instructions.h:5039

llvm::processShuffleMasks

void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)

Splits and processes shuffle mask depending on the number of input and output registers.

Definition:VectorUtils.cpp:556

llvm::all_equal

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

Definition:STLExtras.h:2087

llvm::maskContainsAllOneOrUndef

bool maskContainsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...

Definition:VectorUtils.cpp:1175

llvm::isTriviallyVectorizable

bool isTriviallyVectorizable(Intrinsic::ID ID)

Identify if the intrinsic is trivially vectorizable.

Definition:VectorUtils.cpp:46

llvm::createSequentialMask

llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)

Create a sequential shuffle mask.

Definition:VectorUtils.cpp:1040

llvm::isVectorIntrinsicWithOverloadTypeAtArg

bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...

Definition:VectorUtils.cpp:162

llvm::computeMinimumValueSizes

MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)

Compute a map of integer instructions to their minimum legal type size.

Definition:VectorUtils.cpp:700

llvm::scaleShuffleMaskElts

bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.

Definition:VectorUtils.cpp:517

llvm::getSplatIndex

int getSplatIndex(ArrayRef< int > Mask)

If all non-negative Mask elements are the same value, return that value.

Definition:VectorUtils.cpp:290

std::swap

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Definition:BitVector.h:860

llvm::PatternMatch::m_ZeroMask

Definition:PatternMatch.h:1868

llvm::cl::desc

Definition:CommandLine.h:409