Movatterモバイル変換

Go to the documentation of this file.

1//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This pass merges loads/stores to/from sequential memory addresses into vector

10// loads/stores. Although there's nothing GPU-specific in here, this pass is

11// motivated by the microarchitectural quirks of nVidia and AMD GPUs.

12//

13// (For simplicity below we talk about loads only, but everything also applies

14// to stores.)

15//

16// This pass is intended to be run late in the pipeline, after other

17// vectorization opportunities have been exploited. So the assumption here is

18// that immediately following our new vector load we'll need to extract out the

19// individual elements of the load, so we can operate on them individually.

20//

21// On CPUs this transformation is usually not beneficial, because extracting the

22// elements of a vector register is expensive on most architectures. It's

23// usually better just to load each element individually into its own scalar

24// register.

25//

26// However, nVidia and AMD GPUs don't have proper vector registers. Instead, a

27// "vector load" loads directly into a series of scalar registers. In effect,

28// extracting the elements of the vector is free. It's therefore always

29// beneficial to vectorize a sequence of loads on these architectures.

30//

31// Vectorizing (perhaps a better name might be "coalescing") loads can have

32// large performance impacts on GPU kernels, and opportunities for vectorizing

33// are common in GPU code. This pass tries very hard to find such

34// opportunities; its runtime is quadratic in the number of loads in a BB.

35//

36// Some CPU architectures, such as ARM, have instructions that load into

37// multiple scalar registers, similar to a GPU vectorized load. In theory ARM

38// could use this pass (with some modifications), but currently it implements

39// its own pass to do something similar to what we do here.

40//

41// Overview of the algorithm and terminology in this pass:

42//

43// - Break up each basic block into pseudo-BBs, composed of instructions which

44// are guaranteed to transfer control to their successors.

45// - Within a single pseudo-BB, find all loads, and group them into

46// "equivalence classes" according to getUnderlyingObject() and loaded

47// element size. Do the same for stores.

48// - For each equivalence class, greedily build "chains". Each chain has a

49// leader instruction, and every other member of the chain has a known

50// constant offset from the first instr in the chain.

51// - Break up chains so that they contain only contiguous accesses of legal

52// size with no intervening may-alias instrs.

53// - Convert each chain to vector instructions.

54//

55// The O(n^2) behavior of this pass comes from initially building the chains.

56// In the worst case we have to compare each new instruction to all of those

57// that came before. To limit this, we only calculate the offset to the leaders

58// of the N most recently-used chains.

60#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"

61#include "llvm/ADT/APInt.h"

62#include "llvm/ADT/ArrayRef.h"

63#include "llvm/ADT/DenseMap.h"

64#include "llvm/ADT/MapVector.h"

65#include "llvm/ADT/PostOrderIterator.h"

66#include "llvm/ADT/STLExtras.h"

67#include "llvm/ADT/Sequence.h"

68#include "llvm/ADT/SmallPtrSet.h"

69#include "llvm/ADT/SmallVector.h"

70#include "llvm/ADT/Statistic.h"

71#include "llvm/ADT/iterator_range.h"

72#include "llvm/Analysis/AliasAnalysis.h"

73#include "llvm/Analysis/AssumptionCache.h"

74#include "llvm/Analysis/MemoryLocation.h"

75#include "llvm/Analysis/ScalarEvolution.h"

76#include "llvm/Analysis/TargetTransformInfo.h"

77#include "llvm/Analysis/ValueTracking.h"

78#include "llvm/Analysis/VectorUtils.h"

79#include "llvm/IR/Attributes.h"

80#include "llvm/IR/BasicBlock.h"

81#include "llvm/IR/ConstantRange.h"

82#include "llvm/IR/Constants.h"

83#include "llvm/IR/DataLayout.h"

84#include "llvm/IR/DerivedTypes.h"

85#include "llvm/IR/Dominators.h"

86#include "llvm/IR/Function.h"

87#include "llvm/IR/GetElementPtrTypeIterator.h"

88#include "llvm/IR/IRBuilder.h"

89#include "llvm/IR/InstrTypes.h"

90#include "llvm/IR/Instruction.h"

91#include "llvm/IR/Instructions.h"

92#include "llvm/IR/LLVMContext.h"

93#include "llvm/IR/Module.h"

94#include "llvm/IR/Type.h"

95#include "llvm/IR/Value.h"

96#include "llvm/InitializePasses.h"

97#include "llvm/Pass.h"

98#include "llvm/Support/Alignment.h"

99#include "llvm/Support/Casting.h"

100#include "llvm/Support/Debug.h"

101#include "llvm/Support/KnownBits.h"

102#include "llvm/Support/MathExtras.h"

103#include "llvm/Support/ModRef.h"

104#include "llvm/Support/raw_ostream.h"

105#include "llvm/Transforms/Utils/Local.h"

106#include <algorithm>

107#include <cassert>

108#include <cstdint>

109#include <cstdlib>

110#include <iterator>

111#include <numeric>

112#include <optional>

113#include <tuple>

114#include <type_traits>

115#include <utility>

116#include <vector>

117

118using namespacellvm;

119

120#define DEBUG_TYPE "load-store-vectorizer"

121

122STATISTIC(NumVectorInstructions,"Number of vector accesses generated");

123STATISTIC(NumScalarsVectorized,"Number of scalar accesses vectorized");

124

125namespace{

126

127// Equivalence class key, the initial tuple by which we group loads/stores.

128// Loads/stores with different EqClassKeys are never merged.

129//

130// (We could in theory remove element-size from the this tuple. We'd just need

131// to fix up the vector packing/unpacking code.)

132usingEqClassKey =

133 std::tuple<constValue */* result of getUnderlyingObject() */,

134unsigned/* AddrSpace */,

135unsigned/* Load/Store element size bits */,

136char/* IsLoad; char b/c bool can't be a DenseMap key */

137 >;

138[[maybe_unused]]llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,

139const EqClassKey &K) {

140constauto &[UnderlyingObject, AddrSpace, ElementSize, IsLoad] = K;

141returnOS << (IsLoad ?"load" :"store") <<" of " << *UnderlyingObject

142 <<" of element size " << ElementSize <<" bits in addrspace "

143 << AddrSpace;

144}

145

146// A Chain is a set of instructions such that:

147// - All instructions have the same equivalence class, so in particular all are

148// loads, or all are stores.

149// - We know the address accessed by the i'th chain elem relative to the

150// chain's leader instruction, which is the first instr of the chain in BB

151// order.

152//

153// Chains have two canonical orderings:

154// - BB order, sorted by Instr->comesBefore.

155// - Offset order, sorted by OffsetFromLeader.

156// This pass switches back and forth between these orders.

157structChainElem {

158Instruction *Inst;

159APInt OffsetFromLeader;

160 ChainElem(Instruction *Inst,APInt OffsetFromLeader)

161 : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}

162};

163usingChain =SmallVector<ChainElem, 1>;

164

165void sortChainInBBOrder(Chain &C) {

166sort(C, [](auto &A,auto &B) {returnA.Inst->comesBefore(B.Inst); });

167}

168

169void sortChainInOffsetOrder(Chain &C) {

170sort(C, [](constauto &A,constauto &B) {

171if (A.OffsetFromLeader !=B.OffsetFromLeader)

172returnA.OffsetFromLeader.slt(B.OffsetFromLeader);

173returnA.Inst->comesBefore(B.Inst);// stable tiebreaker

174 });

175}

176

177[[maybe_unused]]void dumpChain(ArrayRef<ChainElem>C) {

178for (constauto &E :C) {

179dbgs() <<" " << *E.Inst <<" (offset " << E.OffsetFromLeader <<")\n";

180 }

181}

182

183usingEquivalenceClassMap =

184MapVector<EqClassKey, SmallVector<Instruction *, 8>>;

185

186// FIXME: Assuming stack alignment of 4 is always good enough

187constexprunsigned StackAdjustedAlignment = 4;

188

189Instruction *propagateMetadata(Instruction *I,const Chain &C) {

190SmallVector<Value *, 8> Values;

191for (const ChainElem &E :C)

192 Values.emplace_back(E.Inst);

193returnpropagateMetadata(I, Values);

194}

195

196bool isInvariantLoad(constInstruction *I) {

197constLoadInst *LI = dyn_cast<LoadInst>(I);

198return LI !=nullptr && LI->hasMetadata(LLVMContext::MD_invariant_load);

199}

200

201/// Reorders the instructions that I depends on (the instructions defining its

202/// operands), to ensure they dominate I.

203void reorder(Instruction *I) {

204SmallPtrSet<Instruction *, 16> InstructionsToMove;

205SmallVector<Instruction *, 16> Worklist;

206

207 Worklist.emplace_back(I);

208while (!Worklist.empty()) {

209Instruction *IW = Worklist.pop_back_val();

210int NumOperands = IW->getNumOperands();

211for (intIdx = 0;Idx < NumOperands;Idx++) {

212Instruction *IM = dyn_cast<Instruction>(IW->getOperand(Idx));

213if (!IM || IM->getOpcode() == Instruction::PHI)

214continue;

215

216// If IM is in another BB, no need to move it, because this pass only

217// vectorizes instructions within one BB.

218if (IM->getParent() !=I->getParent())

219continue;

220

221assert(IM !=I &&"Unexpected cycle while re-ordering instructions");

222

223if (!IM->comesBefore(I)) {

224 InstructionsToMove.insert(IM);

225 Worklist.emplace_back(IM);

226 }

227 }

228 }

229

230// All instructions to move should follow I. Start from I, not from begin().

231for (auto BBI =I->getIterator(), E =I->getParent()->end(); BBI != E;) {

232Instruction *IM = &*(BBI++);

233if (!InstructionsToMove.contains(IM))

234continue;

235 IM->moveBefore(I->getIterator());

236 }

237}

238

239classVectorizer {

240Function &F;

241AliasAnalysis &AA;

242AssumptionCache &AC;

243DominatorTree &DT;

244ScalarEvolution &SE;

245TargetTransformInfo &TTI;

246constDataLayout &DL;

247IRBuilder<> Builder;

248

249// We could erase instrs right after vectorizing them, but that can mess up

250// our BB iterators, and also can make the equivalence class keys point to

251// freed memory. This is fixable, but it's simpler just to wait until we're

252// done with the BB and erase all at once.

253SmallVector<Instruction *, 128> ToErase;

254

255public:

256 Vectorizer(Function &F,AliasAnalysis &AA,AssumptionCache &AC,

257DominatorTree &DT,ScalarEvolution &SE,TargetTransformInfo &TTI)

258 :F(F), AA(AA), AC(AC), DT(DT), SE(SE),TTI(TTI),

259DL(F.getDataLayout()), Builder(SE.getContext()) {}

260

261boolrun();

262

263private:

264staticconstunsigned MaxDepth = 3;

265

266 /// Runs the vectorizer on a "pseudo basic block", which is a range of

267 /// instructions [Begin, End) within one BB all of which have

268 /// isGuaranteedToTransferExecutionToSuccessor(I) == true.

269bool runOnPseudoBB(BasicBlock::iterator Begin,BasicBlock::iterator End);

270

271 /// Runs the vectorizer on one equivalence class, i.e. one set of loads/stores

272 /// in the same BB with the same value for getUnderlyingObject() etc.

273bool runOnEquivalenceClass(const EqClassKey &EqClassKey,

274ArrayRef<Instruction *> EqClass);

275

276 /// Runs the vectorizer on one chain, i.e. a subset of an equivalence class

277 /// where all instructions access a known, constant offset from the first

278 /// instruction.

279bool runOnChain(Chain &C);

280

281 /// Splits the chain into subchains of instructions which read/write a

282 /// contiguous block of memory. Discards any length-1 subchains (because

283 /// there's nothing to vectorize in there).

284 std::vector<Chain> splitChainByContiguity(Chain &C);

285

286 /// Splits the chain into subchains where it's safe to hoist loads up to the

287 /// beginning of the sub-chain and it's safe to sink loads up to the end of

288 /// the sub-chain. Discards any length-1 subchains.

289 std::vector<Chain> splitChainByMayAliasInstrs(Chain &C);

290

291 /// Splits the chain into subchains that make legal, aligned accesses.

292 /// Discards any length-1 subchains.

293 std::vector<Chain> splitChainByAlignment(Chain &C);

294

295 /// Converts the instrs in the chain into a single vectorized load or store.

296 /// Adds the old scalar loads/stores to ToErase.

297bool vectorizeChain(Chain &C);

298

299 /// Tries to compute the offset in bytes PtrB - PtrA.

300 std::optional<APInt> getConstantOffset(Value *PtrA,Value *PtrB,

301Instruction *ContextInst,

302unsignedDepth = 0);

303 std::optional<APInt> getConstantOffsetComplexAddrs(Value *PtrA,Value *PtrB,

304Instruction *ContextInst,

305unsignedDepth);

306 std::optional<APInt> getConstantOffsetSelects(Value *PtrA,Value *PtrB,

307Instruction *ContextInst,

308unsignedDepth);

309

310 /// Gets the element type of the vector that the chain will load or store.

311 /// This is nontrivial because the chain may contain elements of different

312 /// types; e.g. it's legal to have a chain that contains both i32 and float.

313Type *getChainElemTy(const Chain &C);

314

315 /// Determines whether ChainElem can be moved up (if IsLoad) or down (if

316 /// !IsLoad) to ChainBegin -- i.e. there are no intervening may-alias

317 /// instructions.

318 ///

319 /// The map ChainElemOffsets must contain all of the elements in

320 /// [ChainBegin, ChainElem] and their offsets from some arbitrary base

321 /// address. It's ok if it contains additional entries.

322template <bool IsLoadChain>

323boolisSafeToMove(

324Instruction *ChainElem,Instruction *ChainBegin,

325constDenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets);

326

327 /// Merges the equivalence classes if they have underlying objects that differ

328 /// by one level of indirection (i.e., one is a getelementptr and the other is

329 /// the base pointer in that getelementptr).

330void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)const;

331

332 /// Collects loads and stores grouped by "equivalence class", where:

333 /// - all elements in an eq class are a load or all are a store,

334 /// - they all load/store the same element size (it's OK to have e.g. i8 and

335 /// <4 x i8> in the same class, but not i32 and <4 x i8>), and

336 /// - they all have the same value for getUnderlyingObject().

337 EquivalenceClassMap collectEquivalenceClasses(BasicBlock::iterator Begin,

338BasicBlock::iterator End);

339

340 /// Partitions Instrs into "chains" where every instruction has a known

341 /// constant offset from the first instr in the chain.

342 ///

343 /// Postcondition: For all i, ret[i][0].second == 0, because the first instr

344 /// in the chain is the leader, and an instr touches distance 0 from itself.

345 std::vector<Chain> gatherChains(ArrayRef<Instruction *> Instrs);

346};

347

348classLoadStoreVectorizerLegacyPass :publicFunctionPass {

349public:

350staticcharID;

351

352 LoadStoreVectorizerLegacyPass() :FunctionPass(ID) {

353initializeLoadStoreVectorizerLegacyPassPass(

354 *PassRegistry::getPassRegistry());

355 }

356

357boolrunOnFunction(Function &F)override;

358

359StringRef getPassName() const override{

360return"GPU Load and Store Vectorizer";

361 }

362

363voidgetAnalysisUsage(AnalysisUsage &AU) const override{

364 AU.addRequired<AAResultsWrapperPass>();

365 AU.addRequired<AssumptionCacheTracker>();

366 AU.addRequired<ScalarEvolutionWrapperPass>();

367 AU.addRequired<DominatorTreeWrapperPass>();

368 AU.addRequired<TargetTransformInfoWrapperPass>();

369 AU.setPreservesCFG();

370 }

371};

372

373}// end anonymous namespace

374

375char LoadStoreVectorizerLegacyPass::ID = 0;

376

377INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass,DEBUG_TYPE,

378"Vectorize load and Store instructions",false,false)

379INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)

380INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);

381INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

382INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

383INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)

384INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

385INITIALIZE_PASS_END(LoadStoreVectorizerLegacyPass,DEBUG_TYPE,

386 "Vectorizeload and storeinstructions",false,false)

387

388Pass *llvm::createLoadStoreVectorizerPass() {

389returnnew LoadStoreVectorizerLegacyPass();

390}

391

392bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {

393// Don't vectorize when the attribute NoImplicitFloat is used.

394if (skipFunction(F) ||F.hasFnAttribute(Attribute::NoImplicitFloat))

395returnfalse;

396

397AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();

398DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();

399ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();

400TargetTransformInfo &TTI =

401 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

402

403AssumptionCache &AC =

404 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

405

406return Vectorizer(F, AA, AC, DT, SE,TTI).run();

407}

408

409PreservedAnalyses LoadStoreVectorizerPass::run(Function &F,

410FunctionAnalysisManager &AM) {

411// Don't vectorize when the attribute NoImplicitFloat is used.

412if (F.hasFnAttribute(Attribute::NoImplicitFloat))

413returnPreservedAnalyses::all();

414

415AliasAnalysis &AA = AM.getResult<AAManager>(F);

416DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);

417ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);

418TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);

419AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);

420

421bool Changed = Vectorizer(F, AA, AC, DT, SE,TTI).run();

422PreservedAnalyses PA;

423 PA.preserveSet<CFGAnalyses>();

424return Changed ? PA :PreservedAnalyses::all();

425}

426

427bool Vectorizer::run() {

428bool Changed =false;

429// Break up the BB if there are any instrs which aren't guaranteed to transfer

430// execution to their successor.

431//

432// Consider, for example:

433//

434// def assert_arr_len(int n) { if (n < 2) exit(); }

435//

436// load arr[0]

437// call assert_array_len(arr.length)

438// load arr[1]

439//

440// Even though assert_arr_len does not read or write any memory, we can't

441// speculate the second load before the call. More info at

442// https://github.com/llvm/llvm-project/issues/52950.

443for (BasicBlock *BB :post_order(&F)) {

444// BB must at least have a terminator.

445assert(!BB->empty());

446

447SmallVector<BasicBlock::iterator, 8> Barriers;

448 Barriers.emplace_back(BB->begin());

449for (Instruction &I : *BB)

450if (!isGuaranteedToTransferExecutionToSuccessor(&I))

451 Barriers.emplace_back(I.getIterator());

452 Barriers.emplace_back(BB->end());

453

454for (auto It = Barriers.begin(),End = std::prev(Barriers.end()); It !=End;

455 ++It)

456 Changed |= runOnPseudoBB(*It, *std::next(It));

457

458for (Instruction *I : ToErase) {

459auto *PtrOperand =getLoadStorePointerOperand(I);

460if (I->use_empty())

461I->eraseFromParent();

462RecursivelyDeleteTriviallyDeadInstructions(PtrOperand);

463 }

464 ToErase.clear();

465 }

466

467return Changed;

468}

469

470bool Vectorizer::runOnPseudoBB(BasicBlock::iterator Begin,

471BasicBlock::iterator End) {

472LLVM_DEBUG({

473dbgs() <<"LSV: Running on pseudo-BB [" << *Begin <<" ... ";

474if (End != Begin->getParent()->end())

475dbgs() << *End;

476else

477dbgs() <<"<BB end>";

478dbgs() <<")\n";

479 });

480

481bool Changed =false;

482for (constauto &[EqClassKey, EqClass] :

483 collectEquivalenceClasses(Begin,End))

484 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);

485

486return Changed;

487}

488

489bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,

490ArrayRef<Instruction *> EqClass) {

491bool Changed =false;

492

493LLVM_DEBUG({

494dbgs() <<"LSV: Running on equivalence class of size " << EqClass.size()

495 <<" keyed on " << EqClassKey <<":\n";

496for (Instruction *I : EqClass)

497dbgs() <<" " << *I <<"\n";

498 });

499

500 std::vector<Chain> Chains = gatherChains(EqClass);

501LLVM_DEBUG(dbgs() <<"LSV: Got " << Chains.size()

502 <<" nontrivial chains.\n";);

503for (Chain &C : Chains)

504 Changed |= runOnChain(C);

505return Changed;

506}

507

508bool Vectorizer::runOnChain(Chain &C) {

509LLVM_DEBUG({

510dbgs() <<"LSV: Running on chain with " <<C.size() <<" instructions:\n";

511 dumpChain(C);

512 });

513

514// Split up the chain into increasingly smaller chains, until we can finally

515// vectorize the chains.

516//

517// (Don't be scared by the depth of the loop nest here. These operations are

518// all at worst O(n lg n) in the number of instructions, and splitting chains

519// doesn't change the number of instrs. So the whole loop nest is O(n lg n).)

520bool Changed =false;

521for (auto &C : splitChainByMayAliasInstrs(C))

522for (auto &C : splitChainByContiguity(C))

523for (auto &C : splitChainByAlignment(C))

524 Changed |= vectorizeChain(C);

525return Changed;

526}

527

528std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {

529if (C.empty())

530return {};

531

532 sortChainInBBOrder(C);

533

534LLVM_DEBUG({

535dbgs() <<"LSV: splitChainByMayAliasInstrs considering chain:\n";

536 dumpChain(C);

537 });

538

539// We know that elements in the chain with nonverlapping offsets can't

540// alias, but AA may not be smart enough to figure this out. Use a

541// hashtable.

542DenseMap<Instruction *,APInt/*OffsetFromLeader*/> ChainOffsets;

543for (constauto &E :C)

544 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});

545

546// Loads get hoisted up to the first load in the chain. Stores get sunk

547// down to the last store in the chain. Our algorithm for loads is:

548//

549// - Take the first element of the chain. This is the start of a new chain.

550// - Take the next element of `Chain` and check for may-alias instructions

551// up to the start of NewChain. If no may-alias instrs, add it to

552// NewChain. Otherwise, start a new NewChain.

553//

554// For stores it's the same except in the reverse direction.

555//

556// We expect IsLoad to be an std::bool_constant.

557auto Impl = [&](auto IsLoad) {

558// MSVC is unhappy if IsLoad is a capture, so pass it as an arg.

559auto [ChainBegin, ChainEnd] = [&](auto IsLoad) {

560ifconstexpr (IsLoad())

561return std::make_pair(C.begin(),C.end());

562else

563return std::make_pair(C.rbegin(),C.rend());

564 }(IsLoad);

565assert(ChainBegin != ChainEnd);

566

567 std::vector<Chain> Chains;

568SmallVector<ChainElem, 1> NewChain;

569 NewChain.emplace_back(*ChainBegin);

570for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {

571if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.front().Inst,

572 ChainOffsets)) {

573LLVM_DEBUG(dbgs() <<"LSV: No intervening may-alias instrs; can merge "

574 << *ChainIt->Inst <<" into " << *ChainBegin->Inst

575 <<"\n");

576 NewChain.emplace_back(*ChainIt);

577 }else {

578LLVM_DEBUG(

579dbgs() <<"LSV: Found intervening may-alias instrs; cannot merge "

580 << *ChainIt->Inst <<" into " << *ChainBegin->Inst <<"\n");

581if (NewChain.size() > 1) {

582LLVM_DEBUG({

583dbgs() <<"LSV: got nontrivial chain without aliasing instrs:\n";

584 dumpChain(NewChain);

585 });

586 Chains.emplace_back(std::move(NewChain));

587 }

588

589// Start a new chain.

590 NewChain =SmallVector<ChainElem, 1>({*ChainIt});

591 }

592 }

593if (NewChain.size() > 1) {

594LLVM_DEBUG({

595dbgs() <<"LSV: got nontrivial chain without aliasing instrs:\n";

596 dumpChain(NewChain);

597 });

598 Chains.emplace_back(std::move(NewChain));

599 }

600return Chains;

601 };

602

603if (isa<LoadInst>(C[0].Inst))

604return Impl(/*IsLoad=*/std::bool_constant<true>());

605

606assert(isa<StoreInst>(C[0].Inst));

607return Impl(/*IsLoad=*/std::bool_constant<false>());

608}

609

610std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {

611if (C.empty())

612return {};

613

614 sortChainInOffsetOrder(C);

615

616LLVM_DEBUG({

617dbgs() <<"LSV: splitChainByContiguity considering chain:\n";

618 dumpChain(C);

619 });

620

621 std::vector<Chain>Ret;

622Ret.push_back({C.front()});

623

624for (auto It = std::next(C.begin()),End =C.end(); It !=End; ++It) {

625// `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).

626auto &CurChain =Ret.back();

627const ChainElem &Prev = CurChain.back();

628unsigned SzBits =DL.getTypeSizeInBits(getLoadStoreType(&*Prev.Inst));

629assert(SzBits % 8 == 0 &&"Non-byte sizes should have been filtered out by "

630"collectEquivalenceClass");

631APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;

632

633// Add this instruction to the end of the current chain, or start a new one.

634bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;

635LLVM_DEBUG(dbgs() <<"LSV: Instructions are "

636 << (AreContiguous ?"" :"not ") <<"contiguous: "

637 << *Prev.Inst <<" (ends at offset " << PrevReadEnd

638 <<") -> " << *It->Inst <<" (starts at offset "

639 << It->OffsetFromLeader <<")\n");

640if (AreContiguous)

641 CurChain.push_back(*It);

642else

643Ret.push_back({*It});

644 }

645

646// Filter out length-1 chains, these are uninteresting.

647llvm::erase_if(Ret, [](constauto &Chain) {return Chain.size() <= 1; });

648returnRet;

649}

650

651Type *Vectorizer::getChainElemTy(const Chain &C) {

652assert(!C.empty());

653// The rules are:

654// - If there are any pointer types in the chain, use an integer type.

655// - Prefer an integer type if it appears in the chain.

656// - Otherwise, use the first type in the chain.

657//

658// The rule about pointer types is a simplification when we merge e.g. a load

659// of a ptr and a double. There's no direct conversion from a ptr to a

660// double; it requires a ptrtoint followed by a bitcast.

661//

662// It's unclear to me if the other rules have any practical effect, but we do

663// it to match this pass's previous behavior.

664if (any_of(C, [](const ChainElem &E) {

665returngetLoadStoreType(E.Inst)->getScalarType()->isPointerTy();

666 })) {

667returnType::getIntNTy(

668F.getContext(),

669DL.getTypeSizeInBits(getLoadStoreType(C[0].Inst)->getScalarType()));

670 }

671

672for (const ChainElem &E :C)

673if (Type *T =getLoadStoreType(E.Inst)->getScalarType();T->isIntegerTy())

674returnT;

675returngetLoadStoreType(C[0].Inst)->getScalarType();

676}

677

678std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {

679// We use a simple greedy algorithm.

680// - Given a chain of length N, find all prefixes that

681// (a) are not longer than the max register length, and

682// (b) are a power of 2.

683// - Starting from the longest prefix, try to create a vector of that length.

684// - If one of them works, great. Repeat the algorithm on any remaining

685// elements in the chain.

686// - If none of them work, discard the first element and repeat on a chain

687// of length N-1.

688if (C.empty())

689return {};

690

691 sortChainInOffsetOrder(C);

692

693LLVM_DEBUG({

694dbgs() <<"LSV: splitChainByAlignment considering chain:\n";

695 dumpChain(C);

696 });

697

698bool IsLoadChain = isa<LoadInst>(C[0].Inst);

699auto GetVectorFactor = [&](unsigned VF,unsigned LoadStoreSize,

700unsigned ChainSizeBytes,VectorType *VecTy) {

701return IsLoadChain ?TTI.getLoadVectorFactor(VF, LoadStoreSize,

702 ChainSizeBytes, VecTy)

703 :TTI.getStoreVectorFactor(VF, LoadStoreSize,

704 ChainSizeBytes, VecTy);

705 };

706

707#ifndef NDEBUG

708for (constauto &E :C) {

709Type *Ty =getLoadStoreType(E.Inst)->getScalarType();

710assert(isPowerOf2_32(DL.getTypeSizeInBits(Ty)) &&

711"Should have filtered out non-power-of-two elements in "

712"collectEquivalenceClasses.");

713 }

714#endif

715

716unsigned AS =getLoadStoreAddressSpace(C[0].Inst);

717unsigned VecRegBytes =TTI.getLoadStoreVecRegBitWidth(AS) / 8;

718

719 std::vector<Chain>Ret;

720for (unsigned CBegin = 0; CBegin <C.size(); ++CBegin) {

721// Find candidate chains of size not greater than the largest vector reg.

722// These chains are over the closed interval [CBegin, CEnd].

723SmallVector<std::pair<unsigned/*CEnd*/,unsigned/*SizeBytes*/>, 8>

724 CandidateChains;

725for (unsigned CEnd = CBegin + 1,Size =C.size(); CEnd <Size; ++CEnd) {

726APInt Sz =C[CEnd].OffsetFromLeader +

727DL.getTypeStoreSize(getLoadStoreType(C[CEnd].Inst)) -

728C[CBegin].OffsetFromLeader;

729if (Sz.sgt(VecRegBytes))

730break;

731 CandidateChains.emplace_back(CEnd,

732static_cast<unsigned>(Sz.getLimitedValue()));

733 }

734

735// Consider the longest chain first.

736for (auto It = CandidateChains.rbegin(),End = CandidateChains.rend();

737 It !=End; ++It) {

738auto [CEnd, SizeBytes] = *It;

739LLVM_DEBUG(

740dbgs() <<"LSV: splitChainByAlignment considering candidate chain ["

741 << *C[CBegin].Inst <<" ... " << *C[CEnd].Inst <<"]\n");

742

743Type *VecElemTy = getChainElemTy(C);

744// Note, VecElemTy is a power of 2, but might be less than one byte. For

745// example, we can vectorize 2 x <2 x i4> to <4 x i4>, and in this case

746// VecElemTy would be i4.

747unsigned VecElemBits =DL.getTypeSizeInBits(VecElemTy);

748

749// SizeBytes and VecElemBits are powers of 2, so they divide evenly.

750assert((8 * SizeBytes) % VecElemBits == 0);

751unsigned NumVecElems = 8 * SizeBytes / VecElemBits;

752FixedVectorType *VecTy =FixedVectorType::get(VecElemTy, NumVecElems);

753unsigned VF = 8 * VecRegBytes / VecElemBits;

754

755// Check that TTI is happy with this vectorization factor.

756unsigned TargetVF = GetVectorFactor(VF, VecElemBits,

757 VecElemBits * NumVecElems / 8, VecTy);

758if (TargetVF != VF && TargetVF < NumVecElems) {

759LLVM_DEBUG(

760dbgs() <<"LSV: splitChainByAlignment discarding candidate chain "

761"because TargetVF="

762 << TargetVF <<" != VF=" << VF

763 <<" and TargetVF < NumVecElems=" << NumVecElems <<"\n");

764continue;

765 }

766

767// Is a load/store with this alignment allowed by TTI and at least as fast

768// as an unvectorized load/store?

769//

770// TTI and F are passed as explicit captures to WAR an MSVC misparse (??).

771auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &TTI =TTI,

772 &F =F](Align Alignment) {

773if (Alignment.value() % SizeBytes == 0)

774returntrue;

775unsigned VectorizedSpeed = 0;

776bool AllowsMisaligned =TTI.allowsMisalignedMemoryAccesses(

777F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);

778if (!AllowsMisaligned) {

779LLVM_DEBUG(dbgs()

780 <<"LSV: Access of " << SizeBytes <<"B in addrspace "

781 << AS <<" with alignment " << Alignment.value()

782 <<" is misaligned, and therefore can't be vectorized.\n");

783returnfalse;

784 }

785

786unsigned ElementwiseSpeed = 0;

787 (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,

788 Alignment, &ElementwiseSpeed);

789if (VectorizedSpeed < ElementwiseSpeed) {

790LLVM_DEBUG(dbgs()

791 <<"LSV: Access of " << SizeBytes <<"B in addrspace "

792 << AS <<" with alignment " << Alignment.value()

793 <<" has relative speed " << VectorizedSpeed

794 <<", which is lower than the elementwise speed of "

795 << ElementwiseSpeed

796 <<". Therefore this access won't be vectorized.\n");

797returnfalse;

798 }

799returntrue;

800 };

801

802// If we're loading/storing from an alloca, align it if possible.

803//

804// FIXME: We eagerly upgrade the alignment, regardless of whether TTI

805// tells us this is beneficial. This feels a bit odd, but it matches

806// existing tests. This isn't *so* bad, because at most we align to 4

807// bytes (current value of StackAdjustedAlignment).

808//

809// FIXME: We will upgrade the alignment of the alloca even if it turns out

810// we can't vectorize for some other reason.

811Value *PtrOperand =getLoadStorePointerOperand(C[CBegin].Inst);

812bool IsAllocaAccess = AS ==DL.getAllocaAddrSpace() &&

813 isa<AllocaInst>(PtrOperand->stripPointerCasts());

814Align Alignment =getLoadStoreAlignment(C[CBegin].Inst);

815Align PrefAlign =Align(StackAdjustedAlignment);

816if (IsAllocaAccess && Alignment.value() % SizeBytes != 0 &&

817 IsAllowedAndFast(PrefAlign)) {

818Align NewAlign =getOrEnforceKnownAlignment(

819 PtrOperand, PrefAlign,DL,C[CBegin].Inst,nullptr, &DT);

820if (NewAlign >= Alignment) {

821LLVM_DEBUG(dbgs()

822 <<"LSV: splitByChain upgrading alloca alignment from "

823 << Alignment.value() <<" to " << NewAlign.value()

824 <<"\n");

825 Alignment = NewAlign;

826 }

827 }

828

829if (!IsAllowedAndFast(Alignment)) {

830LLVM_DEBUG(

831dbgs() <<"LSV: splitChainByAlignment discarding candidate chain "

832"because its alignment is not AllowedAndFast: "

833 << Alignment.value() <<"\n");

834continue;

835 }

836

837if ((IsLoadChain &&

838 !TTI.isLegalToVectorizeLoadChain(SizeBytes, Alignment, AS)) ||

839 (!IsLoadChain &&

840 !TTI.isLegalToVectorizeStoreChain(SizeBytes, Alignment, AS))) {

841LLVM_DEBUG(

842dbgs() <<"LSV: splitChainByAlignment discarding candidate chain "

843"because !isLegalToVectorizeLoad/StoreChain.");

844continue;

845 }

846

847// Hooray, we can vectorize this chain!

848 Chain &NewChain =Ret.emplace_back();

849for (unsignedI = CBegin;I <= CEnd; ++I)

850 NewChain.emplace_back(C[I]);

851 CBegin = CEnd;// Skip over the instructions we've added to the chain.

852break;

853 }

854 }

855returnRet;

856}

857

858bool Vectorizer::vectorizeChain(Chain &C) {

859if (C.size() < 2)

860returnfalse;

861

862 sortChainInOffsetOrder(C);

863

864LLVM_DEBUG({

865dbgs() <<"LSV: Vectorizing chain of " <<C.size() <<" instructions:\n";

866 dumpChain(C);

867 });

868

869Type *VecElemTy = getChainElemTy(C);

870bool IsLoadChain = isa<LoadInst>(C[0].Inst);

871unsigned AS =getLoadStoreAddressSpace(C[0].Inst);

872unsigned ChainBytes = std::accumulate(

873C.begin(),C.end(), 0u, [&](unsigned Bytes,const ChainElem &E) {

874 return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));

875 });

876assert(ChainBytes %DL.getTypeStoreSize(VecElemTy) == 0);

877// VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller

878// than 1 byte (e.g. VecTy == <32 x i1>).

879Type *VecTy =FixedVectorType::get(

880 VecElemTy, 8 * ChainBytes /DL.getTypeSizeInBits(VecElemTy));

881

882Align Alignment =getLoadStoreAlignment(C[0].Inst);

883// If this is a load/store of an alloca, we might have upgraded the alloca's

884// alignment earlier. Get the new alignment.

885if (AS ==DL.getAllocaAddrSpace()) {

886 Alignment = std::max(

887 Alignment,

888getOrEnforceKnownAlignment(getLoadStorePointerOperand(C[0].Inst),

889MaybeAlign(),DL,C[0].Inst,nullptr, &DT));

890 }

891

892// All elements of the chain must have the same scalar-type size.

893#ifndef NDEBUG

894for (const ChainElem &E :C)

895assert(DL.getTypeStoreSize(getLoadStoreType(E.Inst)->getScalarType()) ==

896DL.getTypeStoreSize(VecElemTy));

897#endif

898

899Instruction *VecInst;

900if (IsLoadChain) {

901// Loads get hoisted to the location of the first load in the chain. We may

902// also need to hoist the (transitive) operands of the loads.

903 Builder.SetInsertPoint(

904llvm::min_element(C, [](constauto &A,constauto &B) {

905returnA.Inst->comesBefore(B.Inst);

906 })->Inst);

907

908// Chain is in offset order, so C[0] is the instr with the lowest offset,

909// i.e. the root of the vector.

910 VecInst = Builder.CreateAlignedLoad(VecTy,

911getLoadStorePointerOperand(C[0].Inst),

912 Alignment);

913

914unsigned VecIdx = 0;

915for (const ChainElem &E :C) {

916Instruction *I = E.Inst;

917Value *V;

918Type *T =getLoadStoreType(I);

919if (auto *VT = dyn_cast<FixedVectorType>(T)) {

920autoMask = llvm::to_vector<8>(

921 llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));

922V = Builder.CreateShuffleVector(VecInst, Mask,I->getName());

923 VecIdx += VT->getNumElements();

924 }else {

925V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),

926I->getName());

927 ++VecIdx;

928 }

929if (V->getType() !=I->getType())

930V = Builder.CreateBitOrPointerCast(V,I->getType());

931I->replaceAllUsesWith(V);

932 }

933

934// Finally, we need to reorder the instrs in the BB so that the (transitive)

935// operands of VecInst appear before it. To see why, suppose we have

936// vectorized the following code:

937//

938// ptr1 = gep a, 1

939// load1 = load i32 ptr1

940// ptr0 = gep a, 0

941// load0 = load i32 ptr0

942//

943// We will put the vectorized load at the location of the earliest load in

944// the BB, i.e. load1. We get:

945//

946// ptr1 = gep a, 1

947// loadv = load <2 x i32> ptr0

948// load0 = extractelement loadv, 0

949// load1 = extractelement loadv, 1

950// ptr0 = gep a, 0

951//

952// Notice that loadv uses ptr0, which is defined *after* it!

953 reorder(VecInst);

954 }else {

955// Stores get sunk to the location of the last store in the chain.

956 Builder.SetInsertPoint(llvm::max_element(C, [](auto &A,auto &B) {

957returnA.Inst->comesBefore(B.Inst);

958 })->Inst);

959

960// Build the vector to store.

961Value *Vec =PoisonValue::get(VecTy);

962unsigned VecIdx = 0;

963auto InsertElem = [&](Value *V) {

964if (V->getType() != VecElemTy)

965 V = Builder.CreateBitOrPointerCast(V, VecElemTy);

966 Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx++));

967 };

968for (const ChainElem &E :C) {

969auto *I = cast<StoreInst>(E.Inst);

970if (FixedVectorType *VT =

971 dyn_cast<FixedVectorType>(getLoadStoreType(I))) {

972for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) {

973 InsertElem(Builder.CreateExtractElement(I->getValueOperand(),

974 Builder.getInt32(J)));

975 }

976 }else {

977 InsertElem(I->getValueOperand());

978 }

979 }

980

981// Chain is in offset order, so C[0] is the instr with the lowest offset,

982// i.e. the root of the vector.

983 VecInst = Builder.CreateAlignedStore(

984 Vec,

985getLoadStorePointerOperand(C[0].Inst),

986 Alignment);

987 }

988

989propagateMetadata(VecInst,C);

990

991for (const ChainElem &E :C)

992 ToErase.emplace_back(E.Inst);

993

994 ++NumVectorInstructions;

995 NumScalarsVectorized +=C.size();

996returntrue;

997}

998

999template <bool IsLoadChain>

1000bool Vectorizer::isSafeToMove(

1001Instruction *ChainElem,Instruction *ChainBegin,

1002constDenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets) {

1003LLVM_DEBUG(dbgs() <<"LSV: isSafeToMove(" << *ChainElem <<" -> "

1004 << *ChainBegin <<")\n");

1005

1006assert(isa<LoadInst>(ChainElem) == IsLoadChain);

1007if (ChainElem == ChainBegin)

1008returntrue;

1009

1010// Invariant loads can always be reordered; by definition they are not

1011// clobbered by stores.

1012if (isInvariantLoad(ChainElem))

1013returntrue;

1014

1015auto BBIt = std::next([&] {

1016ifconstexpr (IsLoadChain)

1017returnBasicBlock::reverse_iterator(ChainElem);

1018else

1019returnBasicBlock::iterator(ChainElem);

1020 }());

1021auto BBItEnd = std::next([&] {

1022ifconstexpr (IsLoadChain)

1023returnBasicBlock::reverse_iterator(ChainBegin);

1024else

1025returnBasicBlock::iterator(ChainBegin);

1026 }());

1027

1028constAPInt &ChainElemOffset = ChainOffsets.at(ChainElem);

1029constunsigned ChainElemSize =

1030DL.getTypeStoreSize(getLoadStoreType(ChainElem));

1031

1032for (; BBIt != BBItEnd; ++BBIt) {

1033Instruction *I = &*BBIt;

1034

1035if (!I->mayReadOrWriteMemory())

1036continue;

1037

1038// Loads can be reordered with other loads.

1039if (IsLoadChain && isa<LoadInst>(I))

1040continue;

1041

1042// Stores can be sunk below invariant loads.

1043if (!IsLoadChain && isInvariantLoad(I))

1044continue;

1045

1046// If I is in the chain, we can tell whether it aliases ChainIt by checking

1047// what offset ChainIt accesses. This may be better than AA is able to do.

1048//

1049// We should really only have duplicate offsets for stores (the duplicate

1050// loads should be CSE'ed), but in case we have a duplicate load, we'll

1051// split the chain so we don't have to handle this case specially.

1052if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {

1053// I and ChainElem overlap if:

1054// - I and ChainElem have the same offset, OR

1055// - I's offset is less than ChainElem's, but I touches past the

1056// beginning of ChainElem, OR

1057// - ChainElem's offset is less than I's, but ChainElem touches past the

1058// beginning of I.

1059constAPInt &IOffset = OffsetIt->second;

1060unsigned IElemSize =DL.getTypeStoreSize(getLoadStoreType(I));

1061if (IOffset == ChainElemOffset ||

1062 (IOffset.sle(ChainElemOffset) &&

1063 (IOffset + IElemSize).sgt(ChainElemOffset)) ||

1064 (ChainElemOffset.sle(IOffset) &&

1065 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {

1066LLVM_DEBUG({

1067// Double check that AA also sees this alias. If not, we probably

1068// have a bug.

1069ModRefInfo MR = AA.getModRefInfo(I,MemoryLocation::get(ChainElem));

1070assert(IsLoadChain ?isModSet(MR) :isModOrRefSet(MR));

1071dbgs() <<"LSV: Found alias in chain: " << *I <<"\n";

1072 });

1073returnfalse;// We found an aliasing instruction; bail.

1074 }

1075

1076continue;// We're confident there's no alias.

1077 }

1078

1079LLVM_DEBUG(dbgs() <<"LSV: Querying AA for " << *I <<"\n");

1080ModRefInfo MR = AA.getModRefInfo(I,MemoryLocation::get(ChainElem));

1081if (IsLoadChain ?isModSet(MR) :isModOrRefSet(MR)) {

1082LLVM_DEBUG(dbgs() <<"LSV: Found alias in chain:\n"

1083 <<" Aliasing instruction:\n"

1084 <<" " << *I <<'\n'

1085 <<" Aliased instruction and pointer:\n"

1086 <<" " << *ChainElem <<'\n'

1087 <<" " << *getLoadStorePointerOperand(ChainElem)

1088 <<'\n');

1089

1090returnfalse;

1091 }

1092 }

1093returntrue;

1094}

1095

1096staticboolcheckNoWrapFlags(Instruction *I,boolSigned) {

1097BinaryOperator *BinOpI = cast<BinaryOperator>(I);

1098return (Signed && BinOpI->hasNoSignedWrap()) ||

1099 (!Signed && BinOpI->hasNoUnsignedWrap());

1100}

1101

1102staticboolcheckIfSafeAddSequence(constAPInt &IdxDiff,Instruction *AddOpA,

1103unsigned MatchingOpIdxA,Instruction *AddOpB,

1104unsigned MatchingOpIdxB,boolSigned) {

1105LLVM_DEBUG(dbgs() <<"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff

1106 <<", AddOpA=" << *AddOpA <<", MatchingOpIdxA="

1107 << MatchingOpIdxA <<", AddOpB=" << *AddOpB

1108 <<", MatchingOpIdxB=" << MatchingOpIdxB

1109 <<", Signed=" <<Signed <<"\n");

1110// If both OpA and OpB are adds with NSW/NUW and with one of the operands

1111// being the same, we can guarantee that the transformation is safe if we can

1112// prove that OpA won't overflow when Ret added to the other operand of OpA.

1113// For example:

1114// %tmp7 = add nsw i32 %tmp2, %v0

1115// %tmp8 = sext i32 %tmp7 to i64

1116// ...

1117// %tmp11 = add nsw i32 %v0, 1

1118// %tmp12 = add nsw i32 %tmp2, %tmp11

1119// %tmp13 = sext i32 %tmp12 to i64

1120//

1121// Both %tmp7 and %tmp12 have the nsw flag and the first operand is %tmp2.

1122// It's guaranteed that adding 1 to %tmp7 won't overflow because %tmp11 adds

1123// 1 to %v0 and both %tmp11 and %tmp12 have the nsw flag.

1124assert(AddOpA->getOpcode() == Instruction::Add &&

1125 AddOpB->getOpcode() == Instruction::Add &&

1126checkNoWrapFlags(AddOpA,Signed) &&checkNoWrapFlags(AddOpB,Signed));

1127if (AddOpA->getOperand(MatchingOpIdxA) ==

1128 AddOpB->getOperand(MatchingOpIdxB)) {

1129Value *OtherOperandA = AddOpA->getOperand(MatchingOpIdxA == 1 ? 0 : 1);

1130Value *OtherOperandB = AddOpB->getOperand(MatchingOpIdxB == 1 ? 0 : 1);

1131Instruction *OtherInstrA = dyn_cast<Instruction>(OtherOperandA);

1132Instruction *OtherInstrB = dyn_cast<Instruction>(OtherOperandB);

1133// Match `x +nsw/nuw y` and `x +nsw/nuw (y +nsw/nuw IdxDiff)`.

1134if (OtherInstrB && OtherInstrB->getOpcode() == Instruction::Add &&

1135checkNoWrapFlags(OtherInstrB,Signed) &&

1136 isa<ConstantInt>(OtherInstrB->getOperand(1))) {

1137 int64_t CstVal =

1138 cast<ConstantInt>(OtherInstrB->getOperand(1))->getSExtValue();

1139if (OtherInstrB->getOperand(0) == OtherOperandA &&

1140 IdxDiff.getSExtValue() == CstVal)

1141returntrue;

1142 }

1143// Match `x +nsw/nuw (y +nsw/nuw -Idx)` and `x +nsw/nuw (y +nsw/nuw x)`.

1144if (OtherInstrA && OtherInstrA->getOpcode() == Instruction::Add &&

1145checkNoWrapFlags(OtherInstrA,Signed) &&

1146 isa<ConstantInt>(OtherInstrA->getOperand(1))) {

1147 int64_t CstVal =

1148 cast<ConstantInt>(OtherInstrA->getOperand(1))->getSExtValue();

1149if (OtherInstrA->getOperand(0) == OtherOperandB &&

1150 IdxDiff.getSExtValue() == -CstVal)

1151returntrue;

1152 }

1153// Match `x +nsw/nuw (y +nsw/nuw c)` and

1154// `x +nsw/nuw (y +nsw/nuw (c + IdxDiff))`.

1155if (OtherInstrA && OtherInstrB &&

1156 OtherInstrA->getOpcode() == Instruction::Add &&

1157 OtherInstrB->getOpcode() == Instruction::Add &&

1158checkNoWrapFlags(OtherInstrA,Signed) &&

1159checkNoWrapFlags(OtherInstrB,Signed) &&

1160 isa<ConstantInt>(OtherInstrA->getOperand(1)) &&

1161 isa<ConstantInt>(OtherInstrB->getOperand(1))) {

1162 int64_t CstValA =

1163 cast<ConstantInt>(OtherInstrA->getOperand(1))->getSExtValue();

1164 int64_t CstValB =

1165 cast<ConstantInt>(OtherInstrB->getOperand(1))->getSExtValue();

1166if (OtherInstrA->getOperand(0) == OtherInstrB->getOperand(0) &&

1167 IdxDiff.getSExtValue() == (CstValB - CstValA))

1168returntrue;

1169 }

1170 }

1171returnfalse;

1172}

1173

1174std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(

1175Value *PtrA,Value *PtrB,Instruction *ContextInst,unsignedDepth) {

1176LLVM_DEBUG(dbgs() <<"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA

1177 <<" PtrB=" << *PtrB <<" ContextInst=" << *ContextInst

1178 <<" Depth=" <<Depth <<"\n");

1179auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);

1180auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);

1181if (!GEPA || !GEPB)

1182return getConstantOffsetSelects(PtrA, PtrB, ContextInst,Depth);

1183

1184// Look through GEPs after checking they're the same except for the last

1185// index.

1186if (GEPA->getNumOperands() != GEPB->getNumOperands() ||

1187 GEPA->getPointerOperand() != GEPB->getPointerOperand())

1188return std::nullopt;

1189gep_type_iterator GTIA =gep_type_begin(GEPA);

1190gep_type_iterator GTIB =gep_type_begin(GEPB);

1191for (unsignedI = 0, E = GEPA->getNumIndices() - 1;I < E; ++I) {

1192if (GTIA.getOperand() != GTIB.getOperand())

1193return std::nullopt;

1194 ++GTIA;

1195 ++GTIB;

1196 }

1197

1198Instruction *OpA = dyn_cast<Instruction>(GTIA.getOperand());

1199Instruction *OpB = dyn_cast<Instruction>(GTIB.getOperand());

1200if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||

1201 OpA->getType() != OpB->getType())

1202return std::nullopt;

1203

1204uint64_t Stride = GTIA.getSequentialElementStride(DL);

1205

1206// Only look through a ZExt/SExt.

1207if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))

1208return std::nullopt;

1209

1210boolSigned = isa<SExtInst>(OpA);

1211

1212// At this point A could be a function parameter, i.e. not an instruction

1213Value *ValA = OpA->getOperand(0);

1214 OpB = dyn_cast<Instruction>(OpB->getOperand(0));

1215if (!OpB || ValA->getType() != OpB->getType())

1216return std::nullopt;

1217

1218constSCEV *OffsetSCEVA = SE.getSCEV(ValA);

1219constSCEV *OffsetSCEVB = SE.getSCEV(OpB);

1220constSCEV *IdxDiffSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);

1221if (IdxDiffSCEV == SE.getCouldNotCompute())

1222return std::nullopt;

1223

1224ConstantRange IdxDiffRange = SE.getSignedRange(IdxDiffSCEV);

1225if (!IdxDiffRange.isSingleElement())

1226return std::nullopt;

1227APInt IdxDiff = *IdxDiffRange.getSingleElement();

1228

1229LLVM_DEBUG(dbgs() <<"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff

1230 <<"\n");

1231

1232// Now we need to prove that adding IdxDiff to ValA won't overflow.

1233bool Safe =false;

1234

1235// First attempt: if OpB is an add with NSW/NUW, and OpB is IdxDiff added to

1236// ValA, we're okay.

1237if (OpB->getOpcode() == Instruction::Add &&

1238 isa<ConstantInt>(OpB->getOperand(1)) &&

1239 IdxDiff.sle(cast<ConstantInt>(OpB->getOperand(1))->getSExtValue()) &&

1240checkNoWrapFlags(OpB,Signed))

1241 Safe =true;

1242

1243// Second attempt: check if we have eligible add NSW/NUW instruction

1244// sequences.

1245 OpA = dyn_cast<Instruction>(ValA);

1246if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&

1247 OpB->getOpcode() == Instruction::Add &&checkNoWrapFlags(OpA,Signed) &&

1248checkNoWrapFlags(OpB,Signed)) {

1249// In the checks below a matching operand in OpA and OpB is an operand which

1250// is the same in those two instructions. Below we account for possible

1251// orders of the operands of these add instructions.

1252for (unsigned MatchingOpIdxA : {0, 1})

1253for (unsigned MatchingOpIdxB : {0, 1})

1254if (!Safe)

1255 Safe =checkIfSafeAddSequence(IdxDiff, OpA, MatchingOpIdxA, OpB,

1256 MatchingOpIdxB,Signed);

1257 }

1258

1259unsignedBitWidth = ValA->getType()->getScalarSizeInBits();

1260

1261// Third attempt:

1262//

1263// Assuming IdxDiff is positive: If all set bits of IdxDiff or any higher

1264// order bit other than the sign bit are known to be zero in ValA, we can add

1265// Diff to it while guaranteeing no overflow of any sort.

1266//

1267// If IdxDiff is negative, do the same, but swap ValA and ValB.

1268if (!Safe) {

1269// When computing known bits, use the GEPs as context instructions, since

1270// they likely are in the same BB as the load/store.

1271KnownBits Known(BitWidth);

1272computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), Known,DL, 0, &AC,

1273 ContextInst, &DT);

1274APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());

1275if (Signed)

1276 BitsAllowedToBeSet.clearBit(BitWidth - 1);

1277if (BitsAllowedToBeSet.ult(IdxDiff.abs()))

1278return std::nullopt;

1279 Safe =true;

1280 }

1281

1282if (Safe)

1283return IdxDiff * Stride;

1284return std::nullopt;

1285}

1286

1287std::optional<APInt> Vectorizer::getConstantOffsetSelects(

1288Value *PtrA,Value *PtrB,Instruction *ContextInst,unsignedDepth) {

1289if (Depth++ == MaxDepth)

1290return std::nullopt;

1291

1292if (auto *SelectA = dyn_cast<SelectInst>(PtrA)) {

1293if (auto *SelectB = dyn_cast<SelectInst>(PtrB)) {

1294if (SelectA->getCondition() != SelectB->getCondition())

1295return std::nullopt;

1296LLVM_DEBUG(dbgs() <<"LSV: getConstantOffsetSelects, PtrA=" << *PtrA

1297 <<", PtrB=" << *PtrB <<", ContextInst="

1298 << *ContextInst <<", Depth=" <<Depth <<"\n");

1299 std::optional<APInt> TrueDiff = getConstantOffset(

1300 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,Depth);

1301if (!TrueDiff)

1302return std::nullopt;

1303 std::optional<APInt> FalseDiff =

1304 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),

1305 ContextInst,Depth);

1306if (TrueDiff == FalseDiff)

1307return TrueDiff;

1308 }

1309 }

1310return std::nullopt;

1311}

1312

1313void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const{

1314if (EQClasses.size() < 2)// There is nothing to merge.

1315return;

1316

1317// The reduced key has all elements of the ECClassKey except the underlying

1318// object. Check that EqClassKey has 4 elements and define the reduced key.

1319static_assert(std::tuple_size_v<EqClassKey> == 4,

1320"EqClassKey has changed - EqClassReducedKey needs changes too");

1321usingEqClassReducedKey =

1322 std::tuple<std::tuple_element_t<1, EqClassKey>/* AddrSpace */,

1323 std::tuple_element_t<2, EqClassKey>/* Element size */,

1324 std::tuple_element_t<3, EqClassKey>/* IsLoad; */>;

1325usingECReducedKeyToUnderlyingObjectMap =

1326MapVector<EqClassReducedKey,

1327SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;

1328

1329// Form a map from the reduced key (without the underlying object) to the

1330// underlying objects: 1 reduced key to many underlying objects, to form

1331// groups of potentially merge-able equivalence classes.

1332 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;

1333bool FoundPotentiallyOptimizableEC =false;

1334for (constauto &EC : EQClasses) {

1335constauto &Key =EC.first;

1336 EqClassReducedKey RedKey{std::get<1>(Key), std::get<2>(Key),

1337 std::get<3>(Key)};

1338 RedKeyToUOMap[RedKey].insert(std::get<0>(Key));

1339if (RedKeyToUOMap[RedKey].size() > 1)

1340 FoundPotentiallyOptimizableEC =true;

1341 }

1342if (!FoundPotentiallyOptimizableEC)

1343return;

1344

1345LLVM_DEBUG({

1346dbgs() <<"LSV: mergeEquivalenceClasses: before merging:\n";

1347for (constauto &EC : EQClasses) {

1348dbgs() <<" Key: {" <<EC.first <<"}\n";

1349for (constauto &Inst :EC.second)

1350dbgs() <<" Inst: " << *Inst <<'\n';

1351 }

1352 });

1353LLVM_DEBUG({

1354dbgs() <<"LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";

1355for (constauto &RedKeyToUO : RedKeyToUOMap) {

1356dbgs() <<" Reduced key: {" << std::get<0>(RedKeyToUO.first) <<", "

1357 << std::get<1>(RedKeyToUO.first) <<", "

1358 <<static_cast<int>(std::get<2>(RedKeyToUO.first)) <<"} --> "

1359 << RedKeyToUO.second.size() <<" underlying objects:\n";

1360for (auto UObject : RedKeyToUO.second)

1361dbgs() <<" " << *UObject <<'\n';

1362 }

1363 });

1364

1365usingUObjectToUObjectMap =DenseMap<const Value *, const Value *>;

1366

1367// Compute the ultimate targets for a set of underlying objects.

1368auto GetUltimateTargets =

1369 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {

1370 UObjectToUObjectMap IndirectionMap;

1371for (constauto *UObject : UObjects) {

1372constunsigned MaxLookupDepth = 1;// look for 1-level indirections only

1373constauto *UltimateTarget =getUnderlyingObject(UObject, MaxLookupDepth);

1374if (UltimateTarget != UObject)

1375 IndirectionMap[UObject] = UltimateTarget;

1376 }

1377 UObjectToUObjectMap UltimateTargetsMap;

1378for (constauto *UObject : UObjects) {

1379autoTarget = UObject;

1380auto It = IndirectionMap.find(Target);

1381for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))

1382Target = It->second;

1383 UltimateTargetsMap[UObject] =Target;

1384 }

1385return UltimateTargetsMap;

1386 };

1387

1388// For each item in RedKeyToUOMap, if it has more than one underlying object,

1389// try to merge the equivalence classes.

1390for (auto &[RedKey, UObjects] : RedKeyToUOMap) {

1391if (UObjects.size() < 2)

1392continue;

1393auto UTMap = GetUltimateTargets(UObjects);

1394for (constauto &[UObject, UltimateTarget] : UTMap) {

1395if (UObject == UltimateTarget)

1396continue;

1397

1398 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),

1399 std::get<2>(RedKey)};

1400 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),

1401 std::get<2>(RedKey)};

1402// The entry for KeyFrom is guarantted to exist, unlike KeyTo. Thus,

1403// request the reference to the instructions vector for KeyTo first.

1404constauto &VecTo = EQClasses[KeyTo];

1405constauto &VecFrom = EQClasses[KeyFrom];

1406SmallVector<Instruction *, 8> MergedVec;

1407 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),

1408 std::back_inserter(MergedVec),

1409 [](Instruction *A,Instruction *B) {

1410 return A && B && A->comesBefore(B);

1411 });

1412 EQClasses[KeyTo] = std::move(MergedVec);

1413 EQClasses.erase(KeyFrom);

1414 }

1415 }

1416LLVM_DEBUG({

1417dbgs() <<"LSV: mergeEquivalenceClasses: after merging:\n";

1418for (constauto &EC : EQClasses) {

1419dbgs() <<" Key: {" <<EC.first <<"}\n";

1420for (constauto &Inst :EC.second)

1421dbgs() <<" Inst: " << *Inst <<'\n';

1422 }

1423 });

1424}

1425

1426EquivalenceClassMap

1427Vectorizer::collectEquivalenceClasses(BasicBlock::iterator Begin,

1428BasicBlock::iterator End) {

1429 EquivalenceClassMapRet;

1430

1431auto GetUnderlyingObject = [](constValue *Ptr) ->constValue * {

1432constValue *ObjPtr =llvm::getUnderlyingObject(Ptr);

1433if (constauto *Sel = dyn_cast<SelectInst>(ObjPtr)) {

1434// The select's themselves are distinct instructions even if they share

1435// the same condition and evaluate to consecutive pointers for true and

1436// false values of the condition. Therefore using the select's themselves

1437// for grouping instructions would put consecutive accesses into different

1438// lists and they won't be even checked for being consecutive, and won't

1439// be vectorized.

1440return Sel->getCondition();

1441 }

1442return ObjPtr;

1443 };

1444

1445for (Instruction &I :make_range(Begin,End)) {

1446auto *LI = dyn_cast<LoadInst>(&I);

1447auto *SI = dyn_cast<StoreInst>(&I);

1448if (!LI && !SI)

1449continue;

1450

1451if ((LI && !LI->isSimple()) || (SI && !SI->isSimple()))

1452continue;

1453

1454if ((LI && !TTI.isLegalToVectorizeLoad(LI)) ||

1455 (SI && !TTI.isLegalToVectorizeStore(SI)))

1456continue;

1457

1458Type *Ty =getLoadStoreType(&I);

1459if (!VectorType::isValidElementType(Ty->getScalarType()))

1460continue;

1461

1462// Skip weird non-byte sizes. They probably aren't worth the effort of

1463// handling correctly.

1464unsigned TySize =DL.getTypeSizeInBits(Ty);

1465if ((TySize % 8) != 0)

1466continue;

1467

1468// Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain

1469// functions are currently using an integer type for the vectorized

1470// load/store, and does not support casting between the integer type and a

1471// vector of pointers (e.g. i64 to <2 x i16*>)

1472if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())

1473continue;

1474

1475Value *Ptr =getLoadStorePointerOperand(&I);

1476unsigned AS =Ptr->getType()->getPointerAddressSpace();

1477unsigned VecRegSize =TTI.getLoadStoreVecRegBitWidth(AS);

1478

1479unsigned VF = VecRegSize / TySize;

1480VectorType *VecTy = dyn_cast<VectorType>(Ty);

1481

1482// Only handle power-of-two sized elements.

1483if ((!VecTy && !isPowerOf2_32(DL.getTypeSizeInBits(Ty))) ||

1484 (VecTy && !isPowerOf2_32(DL.getTypeSizeInBits(VecTy->getScalarType()))))

1485continue;

1486

1487// No point in looking at these if they're too big to vectorize.

1488if (TySize > VecRegSize / 2 ||

1489 (VecTy &&TTI.getLoadVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))

1490continue;

1491

1492Ret[{GetUnderlyingObject(Ptr), AS,

1493DL.getTypeSizeInBits(getLoadStoreType(&I)->getScalarType()),

1494/*IsLoad=*/LI !=nullptr}]

1495 .emplace_back(&I);

1496 }

1497

1498 mergeEquivalenceClasses(Ret);

1499returnRet;

1500}

1501

1502std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {

1503if (Instrs.empty())

1504return {};

1505

1506unsigned AS =getLoadStoreAddressSpace(Instrs[0]);

1507unsigned ASPtrBits =DL.getIndexSizeInBits(AS);

1508

1509#ifndef NDEBUG

1510// Check that Instrs is in BB order and all have the same addr space.

1511for (size_tI = 1;I < Instrs.size(); ++I) {

1512assert(Instrs[I - 1]->comesBefore(Instrs[I]));

1513assert(getLoadStoreAddressSpace(Instrs[I]) == AS);

1514 }

1515#endif

1516

1517// Machinery to build an MRU-hashtable of Chains.

1518//

1519// (Ideally this could be done with MapVector, but as currently implemented,

1520// moving an element to the front of a MapVector is O(n).)

1521structInstrListElem :ilist_node<InstrListElem>,

1522 std::pair<Instruction *, Chain> {

1523explicit InstrListElem(Instruction *I)

1524 :std::pair<Instruction *, Chain>(I, {}) {}

1525 };

1526structInstrListElemDenseMapInfo {

1527usingPtrInfo =DenseMapInfo<InstrListElem *>;

1528usingIInfo =DenseMapInfo<Instruction *>;

1529static InstrListElem *getEmptyKey() {return PtrInfo::getEmptyKey(); }

1530static InstrListElem *getTombstoneKey() {

1531return PtrInfo::getTombstoneKey();

1532 }

1533staticunsigned getHashValue(const InstrListElem *E) {

1534return IInfo::getHashValue(E->first);

1535 }

1536staticboolisEqual(const InstrListElem *A,const InstrListElem *B) {

1537if (A == getEmptyKey() ||B == getEmptyKey())

1538returnA == getEmptyKey() &&B == getEmptyKey();

1539if (A == getTombstoneKey() ||B == getTombstoneKey())

1540returnA == getTombstoneKey() &&B == getTombstoneKey();

1541return IInfo::isEqual(A->first,B->first);

1542 }

1543 };

1544SpecificBumpPtrAllocator<InstrListElem>Allocator;

1545simple_ilist<InstrListElem> MRU;

1546DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;

1547

1548// Compare each instruction in `instrs` to leader of the N most recently-used

1549// chains. This limits the O(n^2) behavior of this pass while also allowing

1550// us to build arbitrarily long chains.

1551for (Instruction *I : Instrs) {

1552constexprint MaxChainsToTry = 64;

1553

1554bool MatchFound =false;

1555auto ChainIter = MRU.begin();

1556for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();

1557 ++J, ++ChainIter) {

1558if (std::optional<APInt>Offset = getConstantOffset(

1559getLoadStorePointerOperand(ChainIter->first),

1560getLoadStorePointerOperand(I),

1561/*ContextInst=*/

1562 (ChainIter->first->comesBefore(I) ?I : ChainIter->first))) {

1563// `Offset` might not have the expected number of bits, if e.g. AS has a

1564// different number of bits than opaque pointers.

1565 ChainIter->second.emplace_back(I,Offset.value());

1566// Move ChainIter to the front of the MRU list.

1567 MRU.remove(*ChainIter);

1568 MRU.push_front(*ChainIter);

1569 MatchFound =true;

1570break;

1571 }

1572 }

1573

1574if (!MatchFound) {

1575APInt ZeroOffset(ASPtrBits, 0);

1576 InstrListElem *E =new (Allocator.Allocate()) InstrListElem(I);

1577 E->second.emplace_back(I, ZeroOffset);

1578 MRU.push_front(*E);

1579 Chains.insert(E);

1580 }

1581 }

1582

1583 std::vector<Chain>Ret;

1584Ret.reserve(Chains.size());

1585// Iterate over MRU rather than Chains so the order is deterministic.

1586for (auto &E : MRU)

1587if (E.second.size() > 1)

1588Ret.emplace_back(std::move(E.second));

1589returnRet;

1590}

1591

1592std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA,Value *PtrB,

1593Instruction *ContextInst,

1594unsignedDepth) {

1595LLVM_DEBUG(dbgs() <<"LSV: getConstantOffset, PtrA=" << *PtrA

1596 <<", PtrB=" << *PtrB <<", ContextInst= " << *ContextInst

1597 <<", Depth=" <<Depth <<"\n");

1598// We'll ultimately return a value of this bit width, even if computations

1599// happen in a different width.

1600unsigned OrigBitWidth =DL.getIndexTypeSizeInBits(PtrA->getType());

1601APInt OffsetA(OrigBitWidth, 0);

1602APInt OffsetB(OrigBitWidth, 0);

1603 PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);

1604 PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);

1605unsigned NewPtrBitWidth =DL.getTypeStoreSizeInBits(PtrA->getType());

1606if (NewPtrBitWidth !=DL.getTypeStoreSizeInBits(PtrB->getType()))

1607return std::nullopt;

1608

1609// If we have to shrink the pointer, stripAndAccumulateInBoundsConstantOffsets

1610// should properly handle a possible overflow and the value should fit into

1611// the smallest data type used in the cast/gep chain.

1612assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&

1613 OffsetB.getSignificantBits() <= NewPtrBitWidth);

1614

1615 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);

1616 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);

1617if (PtrA == PtrB)

1618return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);

1619

1620// Try to compute B - A.

1621constSCEV *DistScev = SE.getMinusSCEV(SE.getSCEV(PtrB), SE.getSCEV(PtrA));

1622if (DistScev != SE.getCouldNotCompute()) {

1623LLVM_DEBUG(dbgs() <<"LSV: SCEV PtrB - PtrA =" << *DistScev <<"\n");

1624ConstantRange DistRange = SE.getSignedRange(DistScev);

1625if (DistRange.isSingleElement()) {

1626// Handle index width (the width of Dist) != pointer width (the width of

1627// the Offset*s at this point).

1628APInt Dist = DistRange.getSingleElement()->sextOrTrunc(NewPtrBitWidth);

1629return (OffsetB - OffsetA + Dist).sextOrTrunc(OrigBitWidth);

1630 }

1631 }

1632if (std::optional<APInt> Diff =

1633 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,Depth))

1634return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))

1635 .sextOrTrunc(OrigBitWidth);

1636return std::nullopt;

1637}

load

AMDGPU Mark last scratch load

Definition:AMDGPUMarkLastScratchLoad.cpp:142

APInt.h

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

Expand Atomic instructions

Definition:AtomicExpandPass.cpp:172

isEqual

static bool isEqual(const Function &Caller, const Function &Callee)

Definition:Attributes.cpp:2469

Attributes.h

This file contains the simple types necessary to represent the attributes associated with functions a...

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Casting.h

ConstantRange.h

Constants.h

This file contains the declarations for the subclasses of Constant, which represent the different fla...

DataLayout.h

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition:Debug.h:106

DenseMap.h

This file defines the DenseMap class.

DerivedTypes.h

Dominators.h

Size

uint64_t Size

Definition:ELFObjHandler.cpp:81

End

bool End

Definition:ELF_riscv.cpp:480

GetElementPtrTypeIterator.h

Module.h This file contains the declarations for the Module class.

static bool checkNoWrapFlags(Instruction *I, bool Signed)

Definition:LoadStoreVectorizer.cpp:1096

checkIfSafeAddSequence

static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)

Definition:LoadStoreVectorizer.cpp:1102

DEBUG_TYPE

#define DEBUG_TYPE

Definition:LoadStoreVectorizer.cpp:120

LoadStoreVectorizer.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

MapVector.h

This file implements a map that provides insertion order iteration.

MathExtras.h

MemoryLocation.h

This file provides utility analysis objects describing memory locations.

ModRef.h

Signed

@ Signed

Definition:NVPTXISelLowering.cpp:4789

INITIALIZE_PASS_DEPENDENCY

#define INITIALIZE_PASS_DEPENDENCY(depName)

Definition:PassSupport.h:55

INITIALIZE_PASS_END

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:57

INITIALIZE_PASS_BEGIN

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Definition:PassSupport.h:52

Pass.h

PostOrderIterator.h

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

isSafeToMove

static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)

Check if it's safe to move From down to To, checking that no physical registers are clobbered.

Definition:RISCVVectorPeephole.cpp:497

Allocator

Basic Register Allocator

Definition:RegAllocBasic.cpp:146

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h

This file contains some templates that are useful if you are working with the STL at all.

raw_pwrite_stream & OS

Definition:SampleProfWriter.cpp:51

ScalarEvolution.h

Sequence.h

Provides some synthesis utilities to produce sequences of values.

SmallPtrSet.h

This file defines the SmallPtrSet class.

SmallVector.h

This file defines the SmallVector class.

Statistic.h

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC

#define STATISTIC(VARNAME, DESC)

Definition:Statistic.h:166

Ptr

@ Ptr

Definition:TargetLibraryInfo.cpp:77

TargetTransformInfo.h

This pass exposes codegen information to IR-level passes.

Definition:ItaniumDemangle.h:1173

llvm::AAManager

A manager for alias analyses.

Definition:AliasAnalysis.h:933

llvm::AAResultsWrapperPass

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

Definition:AliasAnalysis.h:981

llvm::AAResults

Definition:AliasAnalysis.h:314

llvm::AAResults::getModRefInfo

ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)

Check whether or not an instruction may read or write the optionally specified memory location.

Definition:AliasAnalysis.h:508

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::APInt::clearBit

void clearBit(unsigned BitPosition)

Set a given bit to 0.

Definition:APInt.h:1407

llvm::APInt::zext

APInt zext(unsigned width) const

Zero extend to a new width.

Definition:APInt.cpp:986

llvm::APInt::abs

APInt abs() const

Get the absolute value.

Definition:APInt.h:1773

llvm::APInt::sgt

bool sgt(const APInt &RHS) const

Signed greater than comparison.

Definition:APInt.h:1201

llvm::APInt::getBitWidth

unsigned getBitWidth() const

Return the number of bits in the APInt.

Definition:APInt.h:1468

llvm::APInt::ult

bool ult(const APInt &RHS) const

Unsigned less than comparison.

Definition:APInt.h:1111

llvm::APInt::sle

bool sle(const APInt &RHS) const

Signed less or equal comparison.

Definition:APInt.h:1166

llvm::APInt::sextOrTrunc

APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

Definition:APInt.cpp:1015

llvm::APInt::getLimitedValue

uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const

If this value is smaller than the specified limit, return it, otherwise return the limit value.

Definition:APInt.h:475

llvm::APInt::sge

bool sge(const APInt &RHS) const

Signed greater or equal comparison.

Definition:APInt.h:1237

llvm::APInt::getSExtValue

int64_t getSExtValue() const

Get sign extended value.

Definition:APInt.h:1542

llvm::AnalysisManager

A container for analyses that lazily runs them and caches their results.

Definition:PassManager.h:253

llvm::AnalysisManager::getResult

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Definition:PassManager.h:410

llvm::AnalysisUsage

Represent the analysis usage information of a pass.

Definition:PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired

AnalysisUsage & addRequired()

Definition:PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesCFG

void setPreservesCFG()

This function should be called by the pass, iff they do not:

Definition:Pass.cpp:256

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition:ArrayRef.h:168

llvm::ArrayRef::empty

bool empty() const

empty - Check if the array is empty.

Definition:ArrayRef.h:163

llvm::AssumptionAnalysis

A function analysis which provides an AssumptionCache.

Definition:AssumptionCache.h:173

llvm::AssumptionAnalysis::run

AssumptionCache run(Function &F, FunctionAnalysisManager &)

Definition:AssumptionCache.cpp:219

llvm::AssumptionCacheTracker

An immutable pass that tracks lazily created AssumptionCache objects.

Definition:AssumptionCache.h:204

llvm::AssumptionCache

A cache of @llvm.assume calls within a function.

Definition:AssumptionCache.h:42

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::reverse_iterator

InstListType::reverse_iterator reverse_iterator

Definition:BasicBlock.h:179

llvm::BasicBlock::iterator

InstListType::iterator iterator

Instruction iterators...

Definition:BasicBlock.h:177

llvm::BinaryOperator

Definition:InstrTypes.h:170

llvm::CFGAnalyses

Represents analyses that only rely on functions' control flow.

Definition:Analysis.h:72

llvm::ConstantRange

This class represents a range of values.

Definition:ConstantRange.h:47

llvm::ConstantRange::getSingleElement

const APInt * getSingleElement() const

If this set contains a single element, return it, otherwise return null.

Definition:ConstantRange.h:251

llvm::ConstantRange::isSingleElement

bool isSingleElement() const

Return true if this set contains exactly one member.

Definition:ConstantRange.h:266

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::DenseMapBase::find

iterator find(const_arg_type_t< KeyT > Val)

Definition:DenseMap.h:156

llvm::DenseMapBase::end

iterator end()

Definition:DenseMap.h:84

llvm::DenseMapBase::at

const ValueT & at(const_arg_type_t< KeyT > Val) const

at - Return the entry for the specified key, or abort if no such entry exists.

Definition:DenseMap.h:202

llvm::DenseMap

Definition:DenseMap.h:727

llvm::DenseSet

Implements a dense probed hash-table based set.

Definition:DenseSet.h:278

llvm::DominatorTreeAnalysis

Analysis pass which computes a DominatorTree.

Definition:Dominators.h:279

llvm::DominatorTreeWrapperPass

Legacy analysis pass which computes a DominatorTree.

Definition:Dominators.h:317

llvm::DominatorTree

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Definition:Dominators.h:162

llvm::FixedVectorType

Class to represent fixed width SIMD vectors.

Definition:DerivedTypes.h:563

llvm::FixedVectorType::get

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

Definition:Type.cpp:791

llvm::FunctionPass

FunctionPass class - This class is used to implement most global optimizations.

Definition:Pass.h:310

llvm::FunctionPass::runOnFunction

virtual bool runOnFunction(Function &F)=0

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function

Definition:Function.h:63

llvm::GlobalsAAWrapperPass

Legacy wrapper pass to provide the GlobalsAAResult object.

Definition:GlobalsModRef.h:142

llvm::IRBuilder

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Definition:IRBuilder.h:2705

llvm::Instruction

Definition:Instruction.h:68

llvm::Instruction::hasNoUnsignedWrap

bool hasNoUnsignedWrap() const LLVM_READONLY

Determine whether the no unsigned wrap flag is set.

Definition:Instruction.cpp:403

llvm::Instruction::hasNoSignedWrap

bool hasNoSignedWrap() const LLVM_READONLY

Determine whether the no signed wrap flag is set.

Definition:Instruction.cpp:410

llvm::Instruction::hasMetadata

bool hasMetadata() const

Return true if this instruction has any metadata attached to it.

Definition:Instruction.h:404

llvm::Instruction::comesBefore

bool comesBefore(const Instruction *Other) const

Given an instruction Other in the same basic block as this instruction, return true if this instructi...

Definition:Instruction.cpp:334

llvm::Instruction::getOpcode

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Definition:Instruction.h:310

llvm::Instruction::moveBefore

void moveBefore(Instruction *MovePos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

Definition:Instruction.cpp:175

llvm::LoadInst

An instruction for reading from memory.

Definition:Instructions.h:176

llvm::LoadInst::isSimple

bool isSimple() const

Definition:Instructions.h:247

llvm::LoadStoreVectorizerPass::run

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition:LoadStoreVectorizer.cpp:409

llvm::MapVector

This class implements a map that also provides access to all stored values in a deterministic order.

Definition:MapVector.h:36

llvm::MapVector::insert

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Definition:MapVector.h:141

llvm::MemoryLocation::get

static MemoryLocation get(const LoadInst *LI)

Return a location with information about the memory reference by the given instruction.

Definition:MemoryLocation.cpp:35

llvm::PassRegistry::getPassRegistry

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Definition:PassRegistry.cpp:24

llvm::Pass

Pass interface - Implemented by all 'passes'.

Definition:Pass.h:94

llvm::Pass::getAnalysisUsage

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

Definition:Pass.cpp:98

llvm::Pass::getPassName

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

Definition:Pass.cpp:81

llvm::PoisonValue::get

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition:Constants.cpp:1878

llvm::PreservedAnalyses

A set of analyses that are preserved following a run of a transformation pass.

Definition:Analysis.h:111

llvm::PreservedAnalyses::all

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Definition:Analysis.h:117

llvm::PreservedAnalyses::preserveSet

void preserveSet()

Mark an analysis set as preserved.

Definition:Analysis.h:146

llvm::SCEVAAWrapperPass

Legacy wrapper pass to provide the SCEVAAResult object.

Definition:ScalarEvolutionAliasAnalysis.h:56

llvm::SCEV

This class represents an analyzed expression in the program.

Definition:ScalarEvolution.h:71

llvm::ScalarEvolutionAnalysis

Analysis pass that exposes the ScalarEvolution for a function.

Definition:ScalarEvolution.h:2320

llvm::ScalarEvolutionWrapperPass

Definition:ScalarEvolution.h:2352

llvm::ScalarEvolution

The main scalar evolution driver.

Definition:ScalarEvolution.h:447

llvm::ScalarEvolution::getSCEV

const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

Definition:ScalarEvolution.cpp:4547

llvm::ScalarEvolution::getSignedRange

ConstantRange getSignedRange(const SCEV *S)

Determine the signed range for a particular SCEV.

Definition:ScalarEvolution.h:1013

llvm::ScalarEvolution::getMinusSCEV

const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

Definition:ScalarEvolution.cpp:4655

llvm::ScalarEvolution::getCouldNotCompute

const SCEV * getCouldNotCompute()

Definition:ScalarEvolution.cpp:4487

llvm::SmallPtrSetImpl

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

Definition:SmallPtrSet.h:363

llvm::SmallPtrSetImpl::insert

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

Definition:SmallPtrSet.h:384

llvm::SmallPtrSetImpl::contains

bool contains(ConstPtrType Ptr) const

Definition:SmallPtrSet.h:458

llvm::SmallPtrSet

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

Definition:SmallPtrSet.h:519

llvm::SmallVectorBase::empty

bool empty() const

Definition:SmallVector.h:81

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl::pop_back_val

T pop_back_val()

Definition:SmallVector.h:673

llvm::SmallVectorImpl::emplace_back

reference emplace_back(ArgTypes &&... Args)

Definition:SmallVector.h:937

llvm::SmallVectorTemplateCommon::end

iterator end()

Definition:SmallVector.h:269

llvm::SmallVectorTemplateCommon::front

reference front()

Definition:SmallVector.h:299

llvm::SmallVectorTemplateCommon::begin

iterator begin()

Definition:SmallVector.h:267

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::SpecificBumpPtrAllocator

A BumpPtrAllocator that allows only elements of a specific type to be allocated.

Definition:Allocator.h:389

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::TargetIRAnalysis

Analysis pass providing the TargetTransformInfo.

Definition:TargetTransformInfo.h:3194

llvm::TargetTransformInfoWrapperPass

Wrapper pass for TargetTransformInfo.

Definition:TargetTransformInfo.h:3250

llvm::TargetTransformInfo

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Definition:TargetTransformInfo.h:212

llvm::TargetTransformInfo::isLegalToVectorizeLoad

bool isLegalToVectorizeLoad(LoadInst *LI) const

Definition:TargetTransformInfo.cpp:1316

llvm::TargetTransformInfo::isLegalToVectorizeStore

bool isLegalToVectorizeStore(StoreInst *SI) const

Definition:TargetTransformInfo.cpp:1320

llvm::TargetTransformInfo::getStoreVectorFactor

unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

Definition:TargetTransformInfo.cpp:1352

llvm::TargetTransformInfo::isLegalToVectorizeStoreChain

bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

Definition:TargetTransformInfo.cpp:1330

llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth

unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const

Definition:TargetTransformInfo.cpp:1312

llvm::TargetTransformInfo::getLoadVectorFactor

unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

Definition:TargetTransformInfo.cpp:1345

llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses

bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const

Determine if the target supports unaligned memory accesses.

Definition:TargetTransformInfo.cpp:685

llvm::TargetTransformInfo::isLegalToVectorizeLoadChain

bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

Definition:TargetTransformInfo.cpp:1324

llvm::Target

Target - Wrapper for Target specific information.

Definition:TargetRegistry.h:144

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::isVectorTy

bool isVectorTy() const

True if this is an instance of VectorType.

Definition:Type.h:270

llvm::Type::isPointerTy

bool isPointerTy() const

True if this is an instance of PointerType.

Definition:Type.h:264

llvm::Type::getIntNTy

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::getScalarSizeInBits

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::isPtrOrPtrVectorTy

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

Definition:Type.h:267

llvm::Type::getScalarType

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

Definition:Type.h:355

llvm::User::getOperand

Value * getOperand(unsigned i) const

Definition:User.h:228

llvm::User::getNumOperands

unsigned getNumOperands() const

Definition:User.h:250

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::stripAndAccumulateInBoundsConstantOffsets

const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const

This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...

Definition:Value.h:740

llvm::Value::stripPointerCasts

const Value * stripPointerCasts() const

Strip off pointer casts, all-zero GEPs and address space casts.

Definition:Value.cpp:694

llvm::VectorType::isValidElementType

static bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

llvm::detail::DenseSetImpl::insert

std::pair< iterator, bool > insert(const ValueT &V)

Definition:DenseSet.h:213

llvm::detail::DenseSetImpl::size

size_type size() const

Definition:DenseSet.h:81

llvm::generic_gep_type_iterator

Definition:GetElementPtrTypeIterator.h:31

llvm::generic_gep_type_iterator::getSequentialElementStride

TypeSize getSequentialElementStride(const DataLayout &DL) const

Definition:GetElementPtrTypeIterator.h:154

llvm::generic_gep_type_iterator::getOperand

Value * getOperand() const

Definition:GetElementPtrTypeIterator.h:110

llvm::ilist_detail::node_parent_access::getParent

const ParentTy * getParent() const

Definition:ilist_node.h:32

llvm::ilist_node

Definition:ilist_node.h:215

llvm::raw_ostream

This class implements an extremely fast bulk output stream that can only output to a stream.

Definition:raw_ostream.h:52

llvm::simple_ilist

A simple intrusive list implementation.

Definition:simple_ilist.h:81

llvm::simple_ilist::push_front

void push_front(reference Node)

Insert a node at the front; never copies.

Definition:simple_ilist.h:150

llvm::simple_ilist::end

iterator end()

Definition:simple_ilist.h:127

llvm::simple_ilist::begin

iterator begin()

Definition:simple_ilist.h:125

llvm::simple_ilist::remove

void remove(reference N)

Remove a node by reference; never deletes.

Definition:simple_ilist.h:189

uint64_t

unsigned

iterator_range.h

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

false

Definition:StackSlotColoring.cpp:193

llvm::AMDGPU::PALMD::Key

Key

PAL metadata keys.

Definition:AMDGPUMetadata.h:487

llvm::BitmaskEnumDetail::Mask

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

Definition:BitmaskEnum.h:125

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition:CallingConv.h:24

llvm::M68k::MemAddrModeKind::V

@ V

llvm::MipsISD::Ret

@ Ret

Definition:MipsISelLowering.h:117

llvm::SIEncodingFamily::SI

@ SI

Definition:SIDefines.h:36

llvm::codeview::CompileSym2Flags::EC

@ EC

llvm::dxil::PointerTypeAnalysis::run

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

Definition:PointerTypeAnalysis.cpp:191

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::Offset

@ Offset

Definition:DWP.cpp:480

llvm::min_element

auto min_element(R &&Range)

Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...

Definition:STLExtras.h:2004

llvm::getLoadStoreAddressSpace

unsigned getLoadStoreAddressSpace(const Value *I)

A helper function that returns the address space of the pointer operand of load or store instruction.

Definition:Instructions.h:5030

llvm::size

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

Definition:STLExtras.h:1697

llvm::RecursivelyDeleteTriviallyDeadInstructions

bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

Definition:Local.cpp:546

llvm::createLoadStoreVectorizerPass

Pass * createLoadStoreVectorizerPass()

Create a legacy pass manager instance of the LoadStoreVectorizer pass.

llvm::Depth

@ Depth

Definition:SIMachineScheduler.h:36

llvm::getLoadStorePointerOperand

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

Definition:Instructions.h:4984

llvm::make_range

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

Definition:iterator_range.h:77

llvm::getUnderlyingObject

const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

Definition:ValueTracking.cpp:6768

llvm::post_order

iterator_range< po_iterator< T > > post_order(const T &G)

Definition:PostOrderIterator.h:197

llvm::getLoadStoreAlignment

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

Definition:Instructions.h:5010

llvm::propagateMetadata

Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

Definition:VectorUtils.cpp:942

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1746

llvm::isPowerOf2_32

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

Definition:MathExtras.h:292

llvm::getOrEnforceKnownAlignment

Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)

Try to ensure that the alignment of V is at least PrefAlign bytes.

Definition:Local.cpp:1581

llvm::isModSet

bool isModSet(const ModRefInfo MRI)

Definition:ModRef.h:48

llvm::sort

void sort(IteratorTy Start, IteratorTy End)

Definition:STLExtras.h:1664

llvm::dbgs

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition:Debug.cpp:163

llvm::isModOrRefSet

bool isModOrRefSet(const ModRefInfo MRI)

Definition:ModRef.h:42

llvm::ModRefInfo

ModRefInfo

Flags indicating whether a memory access modifies or references memory.

Definition:ModRef.h:27

llvm::TTI

TargetTransformInfo TTI

Definition:TargetTransformInfo.h:208

llvm::computeKnownBits

void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

Definition:ValueTracking.cpp:164

llvm::max_element

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

Definition:STLExtras.h:2014

llvm::operator<<

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

Definition:APFixedPoint.h:303

llvm::BitWidth

constexpr unsigned BitWidth

Definition:BitmaskEnum.h:217