1//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect 12// We choose the value we use by looking at metadata in the module itself. Note 13// that we intentionally only have one way to choose these values, because other 14// parts of LLVM (particularly, InstCombineCall) rely on being able to predict 15// the values chosen by this pass. 17// If we see an unknown string, we replace its call with 0. 19//===----------------------------------------------------------------------===// 31#include "llvm/IR/IntrinsicsNVPTX.h" 43#define NVVM_REFLECT_FUNCTION "__nvvm_reflect" 44#define NVVM_REFLECT_OCL_FUNCTION "__nvvm_reflect_ocl" 48#define DEBUG_TYPE "nvptx-reflect" 59 NVVMReflect() : NVVMReflect(0) {}
74cl::desc(
"NVVM reflection, enabled by default"));
76char NVVMReflect::ID = 0;
78"Replace occurrences of __nvvm_reflect() calls with 0/1",
false,
87assert(
F.isDeclaration() &&
"_reflect function should not have a body");
88assert(
F.getReturnType()->isIntegerTy() &&
89"_reflect's return type should be integer");
96// Go through the calls in this function. Each call to __nvvm_reflect or 97// llvm.nvvm.reflect should be a CallInst with a ConstantArray argument. 98// First validate that. If the c-string corresponding to the ConstantArray can 99// be found successfully, see if it can be found in VarMap. If so, replace the 100// uses of CallInst with the value found in VarMap. If not, replace the use 103// The IR for __nvvm_reflect calls differs between CUDA versions. 105// CUDA 6.5 and earlier uses this sequence: 106// %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8 107// (i8 addrspace(4)* getelementptr inbounds 108// ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) 109// %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) 111// The value returned by Sym->getOperand(0) is a Constant with a 112// ConstantDataSequential operand which can be converted to string and used 115// CUDA 7.0 does it slightly differently: 116// %reflect = call i32 @__nvvm_reflect(i8* addrspacecast 117// (i8 addrspace(1)* getelementptr inbounds 118// ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) 120// In this case, we get a Constant with a GlobalVariable operand and we need 121// to dig deeper to find its initializer with the string we'll use for lookup. 126Function *Callee = Call->getCalledFunction();
129 Callee->getIntrinsicID() != Intrinsic::nvvm_reflect))
132// FIXME: Improve error handling here and elsewhere in this pass. 133assert(Call->getNumOperands() == 2 &&
134"Wrong number of operands to __nvvm_reflect function");
136// In cuda 6.5 and earlier, we will have an extra constant-to-generic 137// conversion of the string. 138constValue *Str = Call->getArgOperand(0);
139if (
constCallInst *ConvCall = dyn_cast<CallInst>(Str)) {
140// FIXME: Add assertions about ConvCall. 141 Str = ConvCall->getArgOperand(0);
143// Pre opaque pointers we have a constant expression wrapping the constant 145 Str = Str->stripPointerCasts();
146assert(isa<Constant>(Str) &&
147"Format of __nvvm_reflect function not recognized");
149constValue *Operand = cast<Constant>(Str)->getOperand(0);
151// For CUDA-7.0 style __nvvm_reflect calls, we need to find the operand's 153assert(GV->hasInitializer() &&
154"Format of _reflect function not recognized");
155constConstant *Initializer = GV->getInitializer();
156 Operand = Initializer;
159assert(isa<ConstantDataSequential>(Operand) &&
160"Format of _reflect function not recognized");
161assert(cast<ConstantDataSequential>(Operand)->isCString() &&
162"Format of _reflect function not recognized");
164StringRef ReflectArg = cast<ConstantDataSequential>(Operand)->getAsString();
165 ReflectArg = ReflectArg.
substr(0, ReflectArg.
size() - 1);
168int ReflectVal = 0;
// The default value is 0 169if (ReflectArg ==
"__CUDA_FTZ") {
170// Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. Our 171// choice here must be kept in sync with AutoUpgrade, which uses the same 172// technique to detect whether ftz is enabled. 173if (
auto *Flag = mdconst::extract_or_null<ConstantInt>(
174F.getParent()->getModuleFlag(
"nvvm-reflect-ftz")))
175 ReflectVal = Flag->getSExtValue();
176 }
elseif (ReflectArg ==
"__CUDA_ARCH") {
180// If the immediate user is a simple comparison we want to simplify it. 181for (
User *U : Call->users())
185 Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal));
189// The code guarded by __nvvm_reflect may be invalid for the target machine. 190// Traverse the use-def chain, continually simplifying constant expressions 191// until we find a terminator that we can then remove. 195for (
User *U :
I->users())
199I->replaceAllUsesWith(
C);
203 }
elseif (
I->isTerminator()) {
208// Removing via isInstructionTriviallyDead may add duplicates to the ToRemove 209// array. Filter out the duplicates before starting to erase from parent. 220bool NVVMReflect::runOnFunction(
Function &
F) {
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define NVVM_REFLECT_OCL_FUNCTION
SmallVector< Instruction *, 4 > ToSimplify
#define NVVM_REFLECT_FUNCTION
static cl::opt< bool > NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden, cl::desc("NVVM reflection, enabled by default"))
SmallVector< Instruction *, 4 > ToRemove
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
constexpr size_t size() const
size - Get the string size.
LLVM Value Representation.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
FunctionPass * createNVVMReflectPass(unsigned int SmVersion)
auto unique(Range &&R, Predicate P)
void initializeNVVMReflectPass(PassRegistry &)
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)