1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file implements an analysis that determines, for a given memory 10// operation, what preceding memory operations it depends on. It builds on 11// alias analysis information, and tries to provide a lazy, caching interface to 12// a common kind of alias information query. 14//===----------------------------------------------------------------------===// 57#define DEBUG_TYPE "memdep" 59STATISTIC(NumCacheNonLocal,
"Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal,
"Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal,
"Number of uncached non-local responses");
64"Number of fully cached non-local ptr responses");
66"Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr,
"Number of uncached non-local ptr responses");
69"Number of block queries that were completely cached");
71// Limit for the number of instructions to scan in a block. 75cl::desc(
"The number of instructions to scan in a block in memory " 76"dependency analysis (default = 100)"));
80cl::desc(
"The number of blocks to scan during memory " 81"dependency analysis (default = 200)"));
83// Limit on the number of memdep results to process. 86/// This is a helper function that removes Val from 'Inst's set in ReverseMap. 88/// If the set becomes empty, remove Inst's entry. 89template <
typename KeyTy>
94 ReverseMap.
find(Inst);
95assert(InstIt != ReverseMap.
end() &&
"Reverse map out of sync?");
96bool Found = InstIt->second.
erase(Val);
97assert(Found &&
"Invalid reverse map!");
99if (InstIt->second.
empty())
100 ReverseMap.erase(InstIt);
103/// If the given instruction references a specific memory location, fill in Loc 104/// with the details, otherwise set Loc.Ptr to null. 106/// Returns a ModRefInfo value describing the general behavior of the 110if (
constLoadInst *LI = dyn_cast<LoadInst>(Inst)) {
111if (LI->isUnordered()) {
113return ModRefInfo::Ref;
115if (LI->getOrdering() == AtomicOrdering::Monotonic) {
117return ModRefInfo::ModRef;
120return ModRefInfo::ModRef;
123if (
constStoreInst *SI = dyn_cast<StoreInst>(Inst)) {
124if (SI->isUnordered()) {
126return ModRefInfo::Mod;
128if (SI->getOrdering() == AtomicOrdering::Monotonic) {
130return ModRefInfo::ModRef;
133return ModRefInfo::ModRef;
136if (
constVAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
138return ModRefInfo::ModRef;
141if (
constCallBase *CB = dyn_cast<CallBase>(Inst)) {
143// calls to free() deallocate the entire structure 145return ModRefInfo::Mod;
150switch (
II->getIntrinsicID()) {
151case Intrinsic::lifetime_start:
152case Intrinsic::lifetime_end:
153case Intrinsic::invariant_start:
155// These intrinsics don't really modify the memory, but returning Mod 156// will allow them to be handled conservatively. 157return ModRefInfo::Mod;
158case Intrinsic::invariant_end:
160// These intrinsics don't really modify the memory, but returning Mod 161// will allow them to be handled conservatively. 162return ModRefInfo::Mod;
163case Intrinsic::masked_load:
165return ModRefInfo::Ref;
166case Intrinsic::masked_store:
168return ModRefInfo::Mod;
174// Otherwise, just do the coarse-grained thing that always works. 176return ModRefInfo::ModRef;
178return ModRefInfo::Ref;
179return ModRefInfo::NoModRef;
182/// Private helper for finding the local dependencies of a call site. 183MemDepResult MemoryDependenceResults::getCallDependencyFrom(
188// Walk backwards through the block, looking for dependencies. 189while (ScanIt != BB->
begin()) {
191// Debug intrinsics don't cause dependences and should not affect Limit 192if (isa<DbgInfoIntrinsic>(Inst))
195// Limit the amount of scanning we do so we don't end up with quadratic 196// running time on extreme testcases. 201// If this inst is a memory op, get the pointer it accessed 205// A simple instruction. 211if (
auto *CallB = dyn_cast<CallBase>(Inst)) {
212// If these two calls do not interfere, look past it. 214// If the two calls are the same, return Inst as a Def, so that 215// Call can be found redundant and eliminated. 216if (isReadOnlyCall && !
isModSet(MR) &&
217Call->isIdenticalToWhenDefined(CallB))
220// Otherwise if the two calls don't interact (e.g. CallB is readnone) 227// If we could not obtain a pointer for the instruction and the instruction 228// touches memory then assume that this is a dependency. 233// No dependence found. If this is the entry block of the function, it is 234// unknown, otherwise it is non-local. 245if (QueryInst !=
nullptr) {
246if (
auto *LI = dyn_cast<LoadInst>(QueryInst)) {
249if (InvariantGroupDependency.
isDef())
250return InvariantGroupDependency;
254 MemLoc,
isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
255if (SimpleDep.
isDef())
257// Non-local invariant group dependency indicates there is non local Def 258// (it only returns nonLocal if it finds nonLocal def), which is better than 259// local clobber and everything else. 261return InvariantGroupDependency;
264"InvariantGroupDependency should be only unknown at this point");
280if (!LI->
hasMetadata(LLVMContext::MD_invariant_group))
283// Take the ptr operand after all casts and geps 0. This way we can search 284// cast graph down only. 287// It's is not safe to walk the use list of global value, because function 288// passes aren't allowed to look outside their functions. 289// FIXME: this could be fixed by filtering instructions from outside 290// of current function. 291if (isa<GlobalValue>(LoadOperand))
295// Order of instructions in uses list is unpredictible. In order to always 296// get the same result, we will look for the closest dominance. 298assert(
Other &&
"Must call it with not null instruction");
304for (
constUse &Us : LoadOperand->
uses()) {
305auto *U = dyn_cast<Instruction>(Us.getUser());
306if (!U || U == LI || !DT.
dominates(U, LI))
309// If we hit load/store with the same invariant.group metadata (and the 310// same pointer operand) we can assume that value pointed by pointer 311// operand didn't change. 312if ((isa<LoadInst>(U) ||
313 (isa<StoreInst>(U) &&
315 U->hasMetadata(LLVMContext::MD_invariant_group))
316 ClosestDependency = GetClosestDependency(ClosestDependency, U);
319if (!ClosestDependency)
323// Def(U) can't be returned here because it is non-local. If local 324// dependency won't be found then return nonLocal counting that the 325// user will call getNonLocalPointerDependency, which will return cached 327 NonLocalDefsCache.try_emplace(
330 ReverseNonLocalDefsCache[ClosestDependency].
insert(LI);
334// Check if SI that may alias with MemLoc can be safely skipped. This is 335// possible in case if SI can only must alias or no alias with MemLoc (no 336// partial overlapping possible) and it writes the same value that MemLoc 337// contains now (it was loaded before this store and was not modified in 349if (std::min(MemLocAlign, SI->getAlign()).value() <
353auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
354if (!LI || LI->getParent() != SI->getParent())
358unsigned NumVisitedInsts = 0;
359for (
constInstruction *
I = LI;
I != SI;
I =
I->getNextNonDebugInstruction())
360if (++NumVisitedInsts > ScanLimit ||
371bool isInvariantLoad =
false;
377 Limit = &DefaultLimit;
379// We must be careful with atomic accesses, as they may allow another thread 380// to touch this location, clobbering it. We are conservative: if the 381// QueryInst is not a simple (non-atomic) memory access, we automatically 383// If it is simple, we know based on the results of 384// "Compiler testing via a theory of sound optimisations in the C11/C++11 385// memory model" in PLDI 2013, that a non-atomic location can only be 386// clobbered between a pair of a release and an acquire action, with no 387// access to the location in between. 388// Here is an example for giving the general intuition behind this rule. 389// In the following code: 391// release action; [1] 392// acquire action; [4] 394// It is unsafe to replace %val by 0 because another thread may be running: 395// acquire action; [2] 397// release action; [3] 398// with synchronization from 1 to 2 and from 3 to 4, resulting in %val 399// being 42. A key property of this program however is that if either 400// 1 or 4 were missing, there would be a race between the store of 42 401// either the store of 0 or the load (making the whole program racy). 402// The paper mentioned above shows that the same property is respected 403// by every program that can detect any optimization of that kind: either 404// it is racy (undefined) or there is a release followed by an acquire 405// between the pair of accesses under consideration. 407// If the load is invariant, we "know" that it doesn't alias *any* write. We 408// do want to respect mustalias results since defs are useful for value 409// forwarding, but any mayalias write can be assumed to be noalias. 410// Arguably, this logic should be pushed inside AliasAnalysis itself. 412if (
LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
413if (LI->hasMetadata(LLVMContext::MD_invariant_load))
414 isInvariantLoad =
true;
415 MemLocAlign = LI->getAlign();
418// True for volatile instruction. 419// For Load/Store return true if atomic ordering is stronger than AO, 420// for other instruction just true if it can read or write to memory. 424if (
auto *LI = dyn_cast<LoadInst>(
I))
426if (
auto *SI = dyn_cast<StoreInst>(
I))
428returnI->mayReadOrWriteMemory();
431// Walk backwards through the basic block, looking for dependencies. 432while (ScanIt != BB->
begin()) {
436// Debug intrinsics don't (and can't) cause dependencies. 437if (isa<DbgInfoIntrinsic>(
II))
440// Limit the amount of scanning we do so we don't end up with quadratic 441// running time on extreme testcases. 447// If we reach a lifetime begin or end marker, then the query ends here 448// because the value is undefined. 451case Intrinsic::lifetime_start: {
452// FIXME: This only considers queries directly on the invariant-tagged 453// pointer, not on query pointers that are indexed off of them. It'd 454// be nice to handle that at some point (the right approach is to use 455// GetPointerBaseWithConstantOffset). 461case Intrinsic::masked_load:
462case Intrinsic::masked_store: {
470if (
ID == Intrinsic::masked_load)
477// Values depend on loads if the pointers are must aliased. This means 478// that a load depends on another must aliased load from the same value. 479// One exception is atomic loads: a value can depend on an atomic load that 480// it does not alias with when this atomic load indicates that another 481// thread may be accessing the location. 482if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
483// While volatile access cannot be eliminated, they do not have to clobber 484// non-aliasing locations, as normal accesses, for example, can be safely 485// reordered with volatile accesses. 486if (LI->isVolatile()) {
488// Original QueryInst *may* be volatile 491// Ordering required if QueryInst is itself volatile 493// Otherwise, volatile doesn't imply any special ordering 496// Atomic loads have complications involved. 497// A Monotonic (or higher) load is OK if the query inst is itself not 499// FIXME: This is overly conservative. 510// If we found a pointer, check if it could be the same as our pointer. 517// Must aliased loads are defs of each other. 521// If we have a partial alias, then return this as a clobber for the 524 ClobberOffsets[LI] = R.getOffset();
528// Random may-alias loads don't depend on each other without a 533// Stores don't alias loads from read-only memory. 537// Stores depend on may/must aliased loads. 541if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
542// Atomic stores have complications involved. 543// A Monotonic store is OK if the query inst is itself not atomic. 544// FIXME: This is overly conservative. 545if (!SI->isUnordered() && SI->isAtomic()) {
549// Ok, if we are here the guard above guarantee us that 550// QueryInst is a non-atomic or unordered load/store. 551// SI is atomic with monotonic or release semantic (seq_cst for store 552// is actually a release semantic plus total order over other seq_cst 553// instructions, as soon as QueryInst is not seq_cst we can consider it 554// as simple release semantic). 555// Monotonic and Release semantic allows re-ordering before store 556// so we are safe to go further and check the aliasing. It will prohibit 557// re-ordering in case locations are may or must alias. 560// While volatile access cannot be eliminated, they do not have to clobber 561// non-aliasing locations, as normal accesses can for example be reordered 562// with volatile accesses. 567// If alias analysis can tell that this store is guaranteed to not modify 568// the query pointer, ignore it. Use getModRefInfo to handle cases where 569// the query pointer points to constant memory etc. 573// Ok, this store might clobber the query pointer. Check to see if it is 574// a must alias: in this case, we want to return this as a def. 575// FIXME: Use ModRefInfo::Must bit from getModRefInfo call above. 578// If we found a pointer, check if it could be the same as our pointer. 592// If this is an allocation, and if we know that the accessed pointer is to 593// the allocation, return Def. This means that there is no dependence and 594// the access can be optimized based on that. For example, a load could 595// turn into undef. Note that we can bypass the allocation itself when 596// looking for a clobber in many cases; that's an alias property and is 597// handled by BasicAA. 600if (AccessPtr == Inst || BatchAA.
isMustAlias(Inst, AccessPtr))
604// If we found a select instruction for MemLoc pointer, return it as Def 606if (isa<SelectInst>(Inst) && MemLoc.
Ptr == Inst)
612// A release fence requires that all stores complete before it, but does 613// not prevent the reordering of following loads or stores 'before' the 614// fence. As a result, we look past it when finding a dependency for 615// loads. DSE uses this to find preceding stores to delete and thus we 616// can't bypass the fence if the query instruction is a store. 617if (
FenceInst *FI = dyn_cast<FenceInst>(Inst))
621// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. 624// If the call has no effect on the queried pointer, just ignore it. 629// If the call is known to never store to the pointer, and if this is a 630// load query, we can safely ignore it (scan past it). 635// Otherwise, there is a potential dependence. Return a clobber. 640// No dependence found. If this is the entry block of the function, it is 641// unknown, otherwise it is non-local. 648 ClobberOffsets.
clear();
651// Check for a cached result 654// If the cached entry is non-dirty, just return it. Note that this depends 655// on MemDepResult's default constructing to 'dirty'. 656if (!LocalCache.isDirty())
659// Otherwise, if we have a dirty entry, we know we can start the scan at that 660// instruction, which may save us some work. 671// No dependence found. If this is the entry block of the function, it is 672// unknown, otherwise it is non-local. 681// If we can do a pointer scan, make it happen. 683if (
auto *
II = dyn_cast<IntrinsicInst>(QueryInst))
684isLoad |=
II->getIntrinsicID() == Intrinsic::lifetime_start;
688 QueryParent, QueryInst,
nullptr);
689 }
elseif (
auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
691 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
694// Non-memory instruction. 698// Remember the result! 700 ReverseLocalDeps[
I].
insert(QueryInst);
706/// This method is used when -debug is specified to verify that cache arrays 707/// are properly kept sorted. 711 Count = Cache.size();
712assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
713"Cache isn't sorted!");
720"getNonLocalCallDependency should only be used on calls with " 722 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
725// This is the set of blocks that need to be recomputed. In the cached case, 726// this can happen due to instructions being deleted etc. In the uncached 727// case, this starts out as the set of predecessors we care about. 731// Okay, we have a cache entry. If we know it is not dirty, just return it 732// with no computation. 738// If we already have a partially computed set of results, scan them to 739// determine what is dirty, seeding our initial DirtyBlocks worklist. 740for (
auto &Entry : Cache)
741if (Entry.getResult().isDirty())
744// Sort the cache so that we can do fast binary search lookups below. 747 ++NumCacheDirtyNonLocal;
749// Seed DirtyBlocks with each of the preds of QueryInst's block. 752 ++NumUncacheNonLocal;
755// isReadonlyCall - If this is a read-only call, we can be more aggressive. 760unsigned NumSortedEntries = Cache.
size();
763// Iterate while we still have blocks to update. 764while (!DirtyBlocks.
empty()) {
767// Already processed this block? 768if (!Visited.
insert(DirtyBB).second)
771// Do a binary search to see if we already have an entry for this block in 772// the cache set. If so, find it. 774 NonLocalDepInfo::iterator Entry =
775 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
777if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
781if (Entry != Cache.begin() + NumSortedEntries &&
782 Entry->getBB() == DirtyBB) {
783// If we already have an entry, and if it isn't already dirty, the block 785if (!Entry->getResult().isDirty())
788// Otherwise, remember this slot so we can update the value. 789 ExistingResult = &*Entry;
792// If the dirty entry has a pointer, start scanning from it so we don't have 793// to rescan the entire block. 798// We're removing QueryInst's use of Inst. 799 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
804// Find out if this block has a local dependency for QueryInst. 807if (ScanPos != DirtyBB->
begin()) {
808 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
810// No dependence found. If this is the entry block of the function, it is 811// a clobber, otherwise it is unknown. 817// If we had a dirty entry for the block, update it. Otherwise, just add 824// If the block has a dependency (i.e. it isn't completely transparent to 825// the value), remember the association! 827// Keep the ReverseNonLocalDeps map up to date so we can efficiently 828// update this when we remove instructions. 830 ReverseNonLocalDeps[Inst].
insert(QueryCall);
833// If the block *is* completely transparent to the load, we need to check 834// the predecessors of this block. Add them to our worklist. 845boolisLoad = isa<LoadInst>(QueryInst);
850"Can't get pointer deps of a non-pointer!");
853// Check if there is cached Def with invariant.group. 854auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
855if (NonLocalDefIt != NonLocalDefsCache.end()) {
856 Result.push_back(NonLocalDefIt->second);
857 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
859 NonLocalDefsCache.erase(NonLocalDefIt);
863// This routine does not expect to deal with volatile instructions. 864// Doing so would require piping through the QueryInst all the way through. 865// TODO: volatiles can't be elided, but they can be reordered with other 866// non-volatile accesses. 868// We currently give up on any instruction which is ordered, but we do handle 869// atomic instructions which are unordered. 870// TODO: Handle ordered instructions 872if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
873return !LI->isUnordered();
874 }
elseif (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
875return !SI->isUnordered();
887// This is the set of blocks we've inspected, and the pointer we consider in 888// each block. Because of critical edges, we currently bail out if querying 889// a block with multiple different pointers. This can happen during PHI 892if (getNonLocalPointerDepFromBB(QueryInst,
Address, Loc,
isLoad, FromBB,
893 Result, Visited,
true))
900/// Compute the memdep value for BB with Pointer/PointeeSize using either 901/// cached information in Cache or by doing a lookup (which may use dirty cache 902/// info if available). 904/// If we do a lookup, add the result to the cache. 905MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
907BasicBlock *BB, NonLocalDepInfo *Cache,
unsigned NumSortedEntries,
910bool isInvariantLoad =
false;
912if (
LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
913 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
915// Do a binary search to see if we already have an entry for this block in 916// the cache set. If so, find it. 917 NonLocalDepInfo::iterator Entry = std::upper_bound(
919if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
923if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
924 ExistingResult = &*Entry;
926// Use cached result for invariant load only if there is no dependency for non 927// invariant load. In this case invariant load can not have any dependency as 929if (ExistingResult && isInvariantLoad &&
931 ExistingResult =
nullptr;
933// If we have a cached entry, and it is non-dirty, use it as the value for 935if (ExistingResult && !ExistingResult->
getResult().isDirty()) {
936 ++NumCacheNonLocalPtr;
940// Otherwise, we have to scan for the value. If we have a dirty cache 941// entry, start scanning from its position, otherwise we scan from the end 946"Instruction invalidated?");
947 ++NumCacheDirtyNonLocalPtr;
950// Eliminating the dirty entry from 'Cache', so update the reverse info. 951 ValueIsLoadPair CacheKey(Loc.
Ptr,
isLoad);
954 ++NumUncacheNonLocalPtr;
957// Scan the block for the dependency. 959 QueryInst,
nullptr, BatchAA);
961// Don't cache results for invariant load. 965// If we had a dirty entry for the block, update it. Otherwise, just add 972// If the block has a dependency (i.e. it isn't completely transparent to 973// the value), remember the reverse association because we just added it 978// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently 979// update MemDep when we remove instructions. 981assert(Inst &&
"Didn't depend on anything?");
982 ValueIsLoadPair CacheKey(Loc.
Ptr,
isLoad);
983 ReverseNonLocalPtrDeps[Inst].
insert(CacheKey);
987/// Sort the NonLocalDepInfo cache, given a certain number of elements in the 988/// array that are already properly ordered. 990/// This is optimized for the case when only a few entries are added. 993unsigned NumSortedEntries) {
994switch (Cache.size() - NumSortedEntries) {
996// done, no new entries. 999// Two new entries, insert the last one into place. 1002 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1003 std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1004 Cache.insert(Entry, Val);
1008// One new entry, Just insert the new value at the appropriate position. 1009if (Cache.size() != 1) {
1012 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1014 Cache.insert(Entry, Val);
1018// Added many values, do a full scale sort. 1024/// Perform a dependency query based on pointer/pointeesize starting at the end 1027/// Add any clobber/def results to the results vector and keep track of which 1028/// blocks are visited in 'Visited'. 1030/// This has special behavior for the first block queries (when SkipFirstBlock 1031/// is true). In this special case, it ignores the contents of the specified 1032/// block and starts returning dependence info for its predecessors. 1034/// This function returns true on success, or false to indicate that it could 1035/// not compute dependence information for some reason. This should be treated 1036/// as a clobber dependence on the first instruction in the predecessor block. 1037bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1043// Look up the cached info for Pointer. 1046// Set up a temporary NLPI value. If the map doesn't yet have an entry for 1047// CacheKey, this value will be inserted as the associated value. Otherwise, 1048// it'll be ignored, and we'll have to check to see if the cached size and 1049// aa tags are consistent with the current query. 1050 NonLocalPointerInfo InitialNLPI;
1051 InitialNLPI.Size = Loc.
Size;
1052 InitialNLPI.AATags = Loc.
AATags;
1054bool isInvariantLoad =
false;
1055if (
LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1056 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1058// Get the NLPI for CacheKey, inserting one into the map if it doesn't 1060 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1061 NonLocalPointerDeps.
insert(std::make_pair(CacheKey, InitialNLPI));
1062 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1064// If we already have a cache entry for this CacheKey, we may need to do some 1065// work to reconcile the cache entry and the current query. 1066// Invariant loads don't participate in caching. Thus no need to reconcile. 1067if (!isInvariantLoad && !Pair.second) {
1068if (CacheInfo->Size != Loc.
Size) {
1069// The query's Size is not equal to the cached one. Throw out the cached 1070// data and proceed with the query with the new size. 1071 CacheInfo->Pair = BBSkipFirstBlockPair();
1072 CacheInfo->Size = Loc.
Size;
1073for (
auto &Entry : CacheInfo->NonLocalDeps)
1076 CacheInfo->NonLocalDeps.clear();
1077// The cache is cleared (in the above line) so we will have lost 1078// information about blocks we have already visited. We therefore must 1079// assume that the cache information is incomplete. 1083// If the query's AATags are inconsistent with the cached one, 1084// conservatively throw out the cached data and restart the query with 1086if (CacheInfo->AATags != Loc.
AATags) {
1087if (CacheInfo->AATags) {
1088 CacheInfo->Pair = BBSkipFirstBlockPair();
1090for (
auto &Entry : CacheInfo->NonLocalDeps)
1093 CacheInfo->NonLocalDeps.clear();
1094// The cache is cleared (in the above line) so we will have lost 1095// information about blocks we have already visited. We therefore must 1096// assume that the cache information is incomplete. 1100return getNonLocalPointerDepFromBB(
1102 Visited, SkipFirstBlock, IsIncomplete);
1108// If we have valid cached information for exactly the block we are 1109// investigating, just return it with no recomputation. 1110// Don't use cached information for invariant loads since it is valid for 1111// non-invariant loads only. 1112if (!IsIncomplete && !isInvariantLoad &&
1113 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1114// We have a fully cached result for this query then we can just return the 1115// cached results and populate the visited set. However, we have to verify 1116// that we don't already have conflicting results for these blocks. Check 1117// to ensure that if a block in the results set is in the visited set that 1118// it was for the same pointer query. 1119if (!Visited.
empty()) {
1120for (
auto &Entry : *Cache) {
1123if (VI == Visited.
end() ||
VI->second ==
Pointer.getAddr())
1126// We have a pointer mismatch in a block. Just return false, saying 1127// that something was clobbered in this result. We could also do a 1128// non-fully cached query, but there is little point in doing this. 1134for (
auto &Entry : *Cache) {
1136if (
Entry.getResult().isNonLocal()) {
1145 ++NumCacheCompleteNonLocalPtr;
1149// Otherwise, either this is a new block, a block with an invalid cache 1150// pointer or one that we're about to invalidate by putting more info into 1151// it than its valid cache info. If empty and not explicitly indicated as 1152// incomplete, the result will be valid cache info, otherwise it isn't. 1154// Invariant loads don't affect cache in any way thus no need to update 1155// CacheInfo as well. 1156if (!isInvariantLoad) {
1157if (!IsIncomplete && Cache->empty())
1158 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1160 CacheInfo->Pair = BBSkipFirstBlockPair();
1166// PredList used inside loop. 1169// Keep track of the entries that we know are sorted. Previously cached 1170// entries will all be sorted. The entries we add we only sort on demand (we 1171// don't insert every element into its sorted position). We know that we 1172// won't get any reuse from currently inserted values, because we don't 1173// revisit blocks after we insert info for them. 1174unsigned NumSortedEntries = Cache->size();
1176bool GotWorklistLimit =
false;
1180while (!Worklist.
empty()) {
1183// If we do process a large number of blocks it becomes very expensive and 1184// likely it isn't worth worrying about 1186// Sort it now (if needed) so that recursive invocations of 1187// getNonLocalPointerDepFromBB and other routines that could reuse the 1188// cache value will only see properly sorted cache arrays. 1189if (Cache && NumSortedEntries != Cache->size()) {
1192// Since we bail out, the "Cache" set won't contain all of the 1193// results for the query. This is ok (we can still use it to accelerate 1194// specific block queries) but we can't do the fastpath "return all 1195// results from the set". Clear out the indicator for this. 1196 CacheInfo->Pair = BBSkipFirstBlockPair();
1200// Skip the first block if we have it. 1201if (!SkipFirstBlock) {
1202// Analyze the dependency of *Pointer in FromBB. See if we already have 1204assert(Visited.
count(BB) &&
"Should check 'visited' before adding to WL");
1206// Get the dependency info for Pointer in BB. If we have cached 1207// information, we will use it, otherwise we compute it. 1210 QueryInst, Loc,
isLoad, BB, Cache, NumSortedEntries, BatchAA);
1212// If we got a Def or Clobber, add this to the list of results. 1221// If 'Pointer' is an instruction defined in this block, then we need to do 1222// phi translation to change it into a value live in the predecessor block. 1223// If not, we just add the predecessors to the worklist and scan them with 1225if (!
Pointer.needsPHITranslationFromBlock(BB)) {
1226 SkipFirstBlock =
false;
1229// Verify that we haven't looked at this block yet. 1230 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> InsertRes =
1232if (InsertRes.second) {
1233// First time we've looked at *PI. 1238// If we have seen this block before, but it was with a different 1239// pointer then we have a phi translation failure and we have to treat 1240// this as a clobber. 1241if (InsertRes.first->second !=
Pointer.getAddr()) {
1242// Make sure to clean up the Visited map before continuing on to 1243// PredTranslationFailure. 1244for (
auto *NewBlock : NewBlocks)
1245 Visited.
erase(NewBlock);
1246goto PredTranslationFailure;
1249if (NewBlocks.
size() > WorklistEntries) {
1250// Make sure to clean up the Visited map before continuing on to 1251// PredTranslationFailure. 1252for (
auto *NewBlock : NewBlocks)
1253 Visited.
erase(NewBlock);
1254 GotWorklistLimit =
true;
1255goto PredTranslationFailure;
1257 WorklistEntries -= NewBlocks.size();
1258 Worklist.
append(NewBlocks.begin(), NewBlocks.end());
1262// We do need to do phi translation, if we know ahead of time we can't phi 1263// translate this value, don't even try. 1264if (!
Pointer.isPotentiallyPHITranslatable())
1265goto PredTranslationFailure;
1267// We may have added values to the cache list before this PHI translation. 1268// If so, we haven't done anything to ensure that the cache remains sorted. 1269// Sort it now (if needed) so that recursive invocations of 1270// getNonLocalPointerDepFromBB and other routines that could reuse the cache 1271// value will only see properly sorted cache arrays. 1272if (Cache && NumSortedEntries != Cache->size()) {
1274 NumSortedEntries = Cache->size();
1280 PredList.
push_back(std::make_pair(Pred, Pointer));
1282// Get the PHI translated pointer in this predecessor. This can fail if 1283// not translatable, in which case the getAddr() returns null. 1286 PredPointer.
translateValue(BB, Pred, &DT,
/*MustDominate=*/false);
1288// Check to see if we have already visited this pred block with another 1289// pointer. If so, we can't do this lookup. This failure can occur 1290// with PHI translation when a critical edge exists and the PHI node in 1291// the successor translates to a pointer value different than the 1292// pointer the block was first analyzed with. 1293 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> InsertRes =
1294 Visited.
insert(std::make_pair(Pred, PredPtrVal));
1296if (!InsertRes.second) {
1297// We found the pred; take it off the list of preds to visit. 1300// If the predecessor was visited with PredPtr, then we already did 1301// the analysis and can ignore it. 1302if (InsertRes.first->second == PredPtrVal)
1305// Otherwise, the block was previously analyzed with a different 1306// pointer. We can't represent the result of this case, so we just 1307// treat this as a phi translation failure. 1309// Make sure to clean up the Visited map before continuing on to 1310// PredTranslationFailure. 1311for (
constauto &Pred : PredList)
1312 Visited.
erase(Pred.first);
1314goto PredTranslationFailure;
1318// Actually process results here; this need to be a separate loop to avoid 1319// calling getNonLocalPointerDepFromBB for blocks we don't want to return 1320// any results for. (getNonLocalPointerDepFromBB will modify our 1321// datastructures in ways the code after the PredTranslationFailure label 1323for (
auto &
I : PredList) {
1328bool CanTranslate =
true;
1329// If PHI translation was unable to find an available pointer in this 1330// predecessor, then we have to assume that the pointer is clobbered in 1331// that predecessor. We can still do PRE of the load, which would insert 1332// a computation of the pointer in this predecessor. 1334 CanTranslate =
false;
1336// FIXME: it is entirely possible that PHI translating will end up with 1337// the same value. Consider PHI translating something like: 1338// X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* 1339// to recurse here, pedantically speaking. 1341// If getNonLocalPointerDepFromBB fails here, that means the cached 1342// result conflicted with the Visited list; we have to conservatively 1343// assume it is unknown, but this also does not block PRE of the load. 1345 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1347 Pred, Result, Visited)) {
1348// Add the entry to the Result list. 1352// Since we had a phi translation failure, the cache for CacheKey won't 1353// include all of the entries that we need to immediately satisfy future 1354// queries. Mark this in NonLocalPointerDeps by setting the 1355// BBSkipFirstBlockPair pointer to null. This requires reuse of the 1356// cached value to do more work but not miss the phi trans failure. 1357 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1358 NLPI.Pair = BBSkipFirstBlockPair();
1363// Refresh the CacheInfo/Cache pointer so that it isn't invalidated. 1364 CacheInfo = &NonLocalPointerDeps[CacheKey];
1365 Cache = &CacheInfo->NonLocalDeps;
1366 NumSortedEntries = Cache->size();
1368// Since we did phi translation, the "Cache" set won't contain all of the 1369// results for the query. This is ok (we can still use it to accelerate 1370// specific block queries) but we can't do the fastpath "return all 1371// results from the set" Clear out the indicator for this. 1372 CacheInfo->Pair = BBSkipFirstBlockPair();
1373 SkipFirstBlock =
false;
1376 PredTranslationFailure:
1377// The following code is "failure"; we can't produce a sane translation 1378// for the given block. It assumes that we haven't modified any of 1379// our datastructures while processing the current block. 1382// Refresh the CacheInfo/Cache pointer if it got invalidated. 1383 CacheInfo = &NonLocalPointerDeps[CacheKey];
1384 Cache = &CacheInfo->NonLocalDeps;
1385 NumSortedEntries = Cache->size();
1388// Since we failed phi translation, the "Cache" set won't contain all of the 1389// results for the query. This is ok (we can still use it to accelerate 1390// specific block queries) but we can't do the fastpath "return all 1391// results from the set". Clear out the indicator for this. 1392 CacheInfo->Pair = BBSkipFirstBlockPair();
1394// If *nothing* works, mark the pointer as unknown. 1396// If this is the magic first block, return this as a clobber of the whole 1397// incoming value. Since we can't phi translate to one of the predecessors, 1398// we have to bail out. 1402// Results of invariant loads are not cached thus no need to update cached 1404if (!isInvariantLoad) {
1409assert((GotWorklistLimit ||
I.getResult().isNonLocal() ||
1411"Should only be here with transparent block");
1419 (void)GotWorklistLimit;
1420// Go ahead and report unknown dependence. 1425// Okay, we're done now. If we added new values to the cache, re-sort it. 1431/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it. 1432void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1435// Most of the time this cache is empty. 1436if (!NonLocalDefsCache.empty()) {
1437auto it = NonLocalDefsCache.find(
P.getPointer());
1438if (it != NonLocalDefsCache.end()) {
1440 it->second.getResult().getInst(),
P.getPointer());
1441 NonLocalDefsCache.erase(it);
1444if (
auto *
I = dyn_cast<Instruction>(
P.getPointer())) {
1445auto toRemoveIt = ReverseNonLocalDefsCache.
find(
I);
1446if (toRemoveIt != ReverseNonLocalDefsCache.
end()) {
1447for (
constauto *entry : toRemoveIt->second)
1448 NonLocalDefsCache.erase(entry);
1449 ReverseNonLocalDefsCache.
erase(toRemoveIt);
1455if (It == NonLocalPointerDeps.
end())
1458// Remove all of the entries in the BB->val map. This involves removing 1459// instructions from the reverse map. 1465continue;
// Ignore non-local dep results. 1468// Eliminating the dirty entry from 'Cache', so update the reverse info. 1472// Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). 1473 NonLocalPointerDeps.
erase(It);
1477// If Ptr isn't really a pointer, just ignore it. 1478if (!
Ptr->getType()->isPointerTy())
1480// Flush store info for the pointer. 1482// Flush load info for the pointer. 1493// Walk through the Non-local dependencies, removing this one as the value 1494// for any cached queries. 1496if (NLDI != NonLocalDepsMap.
end()) {
1498for (
auto &Entry : BlockMap)
1499if (
Instruction *Inst = Entry.getResult().getInst())
1501 NonLocalDepsMap.
erase(NLDI);
1504// If we have a cached local dependence query for this instruction, remove it. 1506if (LocalDepEntry != LocalDeps.
end()) {
1507// Remove us from DepInst's reverse set now that the local dep info is gone. 1508if (
Instruction *Inst = LocalDepEntry->second.getInst())
1511// Remove this local dependency info. 1512 LocalDeps.
erase(LocalDepEntry);
1515// If we have any cached dependencies on this instruction, remove 1518// If the instruction is a pointer, remove it from both the load info and the 1521 removeCachedNonLocalPointerDependencies(
ValueIsLoadPair(RemInst,
false));
1522 removeCachedNonLocalPointerDependencies(
ValueIsLoadPair(RemInst,
true));
1524// Otherwise, if the instructions is in the map directly, it must be a load. 1526auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1527if (toRemoveIt != NonLocalDefsCache.end()) {
1528assert(isa<LoadInst>(RemInst) &&
1529"only load instructions should be added directly");
1530constInstruction *DepV = toRemoveIt->second.getResult().getInst();
1531 ReverseNonLocalDefsCache.
find(DepV)->second.erase(RemInst);
1532 NonLocalDefsCache.erase(toRemoveIt);
1536// Loop over all of the things that depend on the instruction we're removing. 1539// If we find RemInst as a clobber or Def in any of the maps for other values, 1540// we need to replace its entry with a dirty version of the instruction after 1541// it. If RemInst is a terminator, we use a null dirty value. 1543// Using a dirty version of the instruction after RemInst saves having to scan 1544// the entire block to get to this point. 1547 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->
getIterator());
1550if (ReverseDepIt != ReverseLocalDeps.
end()) {
1551// RemInst can't be the terminator if it has local stuff depending on it. 1553"Nothing can locally depend on a terminator");
1555for (
Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1556assert(InstDependingOnRemInst != RemInst &&
1557"Already removed our local dep info");
1559 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1561// Make sure to remember that new things depend on NewDepInst. 1563"There is no way something else can have " 1564"a local dep on this if it is a terminator!");
1566 std::make_pair(NewDirtyVal.
getInst(), InstDependingOnRemInst));
1569 ReverseLocalDeps.
erase(ReverseDepIt);
1571// Add new reverse deps after scanning the set, to avoid invalidating the 1572// 'ReverseDeps' reference. 1573while (!ReverseDepsToAdd.
empty()) {
1574 ReverseLocalDeps[ReverseDepsToAdd.
back().first].
insert(
1575 ReverseDepsToAdd.
back().second);
1580 ReverseDepIt = ReverseNonLocalDeps.
find(RemInst);
1581if (ReverseDepIt != ReverseNonLocalDeps.
end()) {
1583assert(
I != RemInst &&
"Already removed NonLocalDep info for RemInst");
1585 PerInstNLInfo &INLD = NonLocalDepsMap[
I];
1586// The information is now dirty! 1589for (
auto &Entry : INLD.first) {
1590if (Entry.getResult().getInst() != RemInst)
1593// Convert to a dirty entry for the subsequent instruction. 1594 Entry.setResult(NewDirtyVal);
1597 ReverseDepsToAdd.
push_back(std::make_pair(NextI,
I));
1601 ReverseNonLocalDeps.
erase(ReverseDepIt);
1603// Add new reverse deps after scanning the set, to avoid invalidating 'Set' 1604while (!ReverseDepsToAdd.
empty()) {
1605 ReverseNonLocalDeps[ReverseDepsToAdd.
back().first].
insert(
1606 ReverseDepsToAdd.
back().second);
1611// If the instruction is in ReverseNonLocalPtrDeps then it appears as a 1612// value in the NonLocalPointerDeps info. 1614 ReverseNonLocalPtrDeps.
find(RemInst);
1615if (ReversePtrDepIt != ReverseNonLocalPtrDeps.
end()) {
1617 ReversePtrDepsToAdd;
1620assert(
P.getPointer() != RemInst &&
1621"Already removed NonLocalPointerDeps info for RemInst");
1625// The cache is not valid for any specific block anymore. 1628// Update any entries for RemInst to use the instruction after it. 1629for (
auto &Entry : NLPDI) {
1630if (Entry.getResult().getInst() != RemInst)
1633// Convert to a dirty entry for the subsequent instruction. 1634 Entry.setResult(NewDirtyVal);
1637 ReversePtrDepsToAdd.
push_back(std::make_pair(NewDirtyInst,
P));
1640// Re-sort the NonLocalDepInfo. Changing the dirty entry to its 1641// subsequent value may invalidate the sortedness. 1645 ReverseNonLocalPtrDeps.
erase(ReversePtrDepIt);
1647while (!ReversePtrDepsToAdd.
empty()) {
1648 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.
back().first].
insert(
1649 ReversePtrDepsToAdd.
back().second);
1654assert(!NonLocalDepsMap.
count(RemInst) &&
"RemInst got reinserted?");
1658/// Verify that the specified instruction does not occur in our internal data 1661/// This function verifies by asserting in debug builds. 1662void MemoryDependenceResults::verifyRemoved(
Instruction *
D)
const{
1664for (
constauto &DepKV : LocalDeps) {
1665assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1666assert(DepKV.second.getInst() !=
D &&
"Inst occurs in data structures");
1669for (
constauto &DepKV : NonLocalPointerDeps) {
1670assert(DepKV.first.getPointer() !=
D &&
"Inst occurs in NLPD map key");
1671for (
constauto &Entry : DepKV.second.NonLocalDeps)
1672assert(Entry.getResult().getInst() !=
D &&
"Inst occurs as NLPD value");
1675for (
constauto &DepKV : NonLocalDepsMap) {
1676assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1677const PerInstNLInfo &INLD = DepKV.second;
1678for (
constauto &Entry : INLD.first)
1680"Inst occurs in data structures");
1683for (
constauto &DepKV : ReverseLocalDeps) {
1684assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1686assert(Inst !=
D &&
"Inst occurs in data structures");
1689for (
constauto &DepKV : ReverseNonLocalDeps) {
1690assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1692assert(Inst !=
D &&
"Inst occurs in data structures");
1695for (
constauto &DepKV : ReverseNonLocalPtrDeps) {
1696assert(DepKV.first !=
D &&
"Inst occurs in rev NLPD map");
1698for (ValueIsLoadPair
P : DepKV.second)
1699assert(
P != ValueIsLoadPair(
D,
false) &&
P != ValueIsLoadPair(
D,
true) &&
1700"Inst occurs in ReverseNonLocalPtrDeps map");
1722"Memory Dependence Analysis",
false,
true)
1750// Check whether our analysis is preserved. 1753// If not, give up now. 1756// Check whether the analyses we depend on became invalid for any reason. 1762// Otherwise this analysis result remains valid. 1767return DefaultBlockScanLimit;
1771auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1772auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1773auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1774auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
static bool isLoad(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
block Block Frequency Analysis
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file defines the DenseMap class.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static const unsigned int NumResultsLimit
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
The possible results of an alias query.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
API to communicate dependencies between analyses during invalidation.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Dependence - This class represents a dependence between two memory memory references in a function.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
void removeInstruction(Instruction *I)
An instruction for ordering other memory operations.
FunctionPass class - This class is used to implement most global optimizations.
const BasicBlock & getEntryBlock() const
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
bool isTerminator() const
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Value * getPointerOperand()
TypeSize getValue() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
MemoryDependenceAnalysis()
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
~MemoryDependenceWrapperPass() override
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithoutAATags() const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void clear()
clear - Remove all information.
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
bool isPointerTy() const
True if this is an instance of PointerType.
A Use represents the edge between a Value definition and its users.
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
This class provides various memory handling functions that manipulate MemoryBlock instances.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool isStrongerThanUnordered(AtomicOrdering AO)
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isModOrRefSet(const ModRefInfo MRI)
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ Ref
The access may reference the value stored in memory.
@ Mod
The access may modify the value stored in memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isNoModRef(const ModRefInfo MRI)
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
This struct is a compact representation of a valid (non-zero power of two) alignment.
A special type used by analysis passes to provide an address that identifies that particular analysis...