Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains both AMDGPU target machine and the CodeGen pass builder.
11/// The AMDGPU target machine contains all of the hardware specific information
12/// needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The
13/// CodeGen pass builder handles the pass pipeline for new pass manager.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPUTargetMachine.h"
18#include "AMDGPU.h"
19#include "AMDGPUAliasAnalysis.h"
20#include "AMDGPUCtorDtorLowering.h"
21#include "AMDGPUExportClustering.h"
22#include "AMDGPUIGroupLP.h"
23#include "AMDGPUISelDAGToDAG.h"
24#include "AMDGPUMacroFusion.h"
25#include "AMDGPUOpenCLEnqueuedBlockLowering.h"
26#include "AMDGPUPerfHintAnalysis.h"
27#include "AMDGPURemoveIncompatibleFunctions.h"
28#include "AMDGPUSplitModule.h"
29#include "AMDGPUTargetObjectFile.h"
30#include "AMDGPUTargetTransformInfo.h"
31#include "AMDGPUUnifyDivergentExitNodes.h"
32#include "GCNDPPCombine.h"
33#include "GCNIterativeScheduler.h"
34#include "GCNSchedStrategy.h"
35#include "GCNVOPDUtils.h"
36#include "R600.h"
37#include "R600TargetMachine.h"
38#include "SIFixSGPRCopies.h"
39#include "SIFixVGPRCopies.h"
40#include "SIFoldOperands.h"
41#include "SILoadStoreOptimizer.h"
42#include "SILowerControlFlow.h"
43#include "SILowerSGPRSpills.h"
44#include "SILowerWWMCopies.h"
45#include "SIMachineFunctionInfo.h"
46#include "SIMachineScheduler.h"
47#include "SIOptimizeExecMasking.h"
48#include "SIOptimizeVGPRLiveRange.h"
49#include "SIPeepholeSDWA.h"
50#include "SIPreAllocateWWMRegs.h"
51#include "SIShrinkInstructions.h"
52#include "TargetInfo/AMDGPUTargetInfo.h"
53#include "Utils/AMDGPUBaseInfo.h"
54#include "llvm/Analysis/CGSCCPassManager.h"
55#include "llvm/Analysis/CallGraphSCCPass.h"
56#include "llvm/Analysis/UniformityAnalysis.h"
57#include "llvm/CodeGen/AtomicExpand.h"
58#include "llvm/CodeGen/DeadMachineInstructionElim.h"
59#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
60#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
61#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
62#include "llvm/CodeGen/GlobalISel/Legalizer.h"
63#include "llvm/CodeGen/GlobalISel/Localizer.h"
64#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
65#include "llvm/CodeGen/MIRParser/MIParser.h"
66#include "llvm/CodeGen/MachineCSE.h"
67#include "llvm/CodeGen/MachineLICM.h"
68#include "llvm/CodeGen/Passes.h"
69#include "llvm/CodeGen/RegAllocRegistry.h"
70#include "llvm/CodeGen/TargetPassConfig.h"
71#include "llvm/IR/IntrinsicsAMDGPU.h"
72#include "llvm/IR/PassManager.h"
73#include "llvm/IR/PatternMatch.h"
74#include "llvm/InitializePasses.h"
75#include "llvm/MC/TargetRegistry.h"
76#include "llvm/Passes/PassBuilder.h"
77#include "llvm/Support/FormatVariadic.h"
78#include "llvm/Transforms/HipStdPar/HipStdPar.h"
79#include "llvm/Transforms/IPO.h"
80#include "llvm/Transforms/IPO/AlwaysInliner.h"
81#include "llvm/Transforms/IPO/ExpandVariadics.h"
82#include "llvm/Transforms/IPO/GlobalDCE.h"
83#include "llvm/Transforms/IPO/Internalize.h"
84#include "llvm/Transforms/Scalar.h"
85#include "llvm/Transforms/Scalar/EarlyCSE.h"
86#include "llvm/Transforms/Scalar/FlattenCFG.h"
87#include "llvm/Transforms/Scalar/GVN.h"
88#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
89#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
90#include "llvm/Transforms/Scalar/NaryReassociate.h"
91#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
92#include "llvm/Transforms/Scalar/Sink.h"
93#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
94#include "llvm/Transforms/Scalar/StructurizeCFG.h"
95#include "llvm/Transforms/Utils.h"
96#include "llvm/Transforms/Utils/FixIrreducible.h"
97#include "llvm/Transforms/Utils/LCSSA.h"
98#include "llvm/Transforms/Utils/LowerSwitch.h"
99#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
100#include "llvm/Transforms/Utils/UnifyLoopExits.h"
101#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
102#include <optional>
103
104using namespacellvm;
105using namespacellvm::PatternMatch;
106
107namespace{
108classSGPRRegisterRegAlloc :publicRegisterRegAllocBase<SGPRRegisterRegAlloc> {
109public:
110 SGPRRegisterRegAlloc(constchar *N,constchar *D,FunctionPassCtorC)
111 :RegisterRegAllocBase(N,D,C) {}
112};
113
114classVGPRRegisterRegAlloc :publicRegisterRegAllocBase<VGPRRegisterRegAlloc> {
115public:
116 VGPRRegisterRegAlloc(constchar *N,constchar *D,FunctionPassCtorC)
117 :RegisterRegAllocBase(N,D,C) {}
118};
119
120classWWMRegisterRegAlloc :publicRegisterRegAllocBase<WWMRegisterRegAlloc> {
121public:
122 WWMRegisterRegAlloc(constchar *N,constchar *D,FunctionPassCtorC)
123 :RegisterRegAllocBase(N,D,C) {}
124};
125
126staticbool onlyAllocateSGPRs(constTargetRegisterInfo &TRI,
127constMachineRegisterInfo &MRI,
128constRegister Reg) {
129constTargetRegisterClass *RC =MRI.getRegClass(Reg);
130returnstatic_cast<constSIRegisterInfo &>(TRI).isSGPRClass(RC);
131}
132
133staticbool onlyAllocateVGPRs(constTargetRegisterInfo &TRI,
134constMachineRegisterInfo &MRI,
135constRegister Reg) {
136constTargetRegisterClass *RC =MRI.getRegClass(Reg);
137return !static_cast<constSIRegisterInfo &>(TRI).isSGPRClass(RC);
138}
139
140staticbool onlyAllocateWWMRegs(constTargetRegisterInfo &TRI,
141constMachineRegisterInfo &MRI,
142constRegister Reg) {
143constSIMachineFunctionInfo *MFI =
144MRI.getMF().getInfo<SIMachineFunctionInfo>();
145constTargetRegisterClass *RC =MRI.getRegClass(Reg);
146return !static_cast<constSIRegisterInfo &>(TRI).isSGPRClass(RC) &&
147 MFI->checkFlag(Reg,AMDGPU::VirtRegFlag::WWM_REG);
148}
149
150/// -{sgpr|wwm|vgpr}-regalloc=... command line option.
151staticFunctionPass *useDefaultRegisterAllocator() {returnnullptr; }
152
153/// A dummy default pass factory indicates whether the register allocator is
154/// overridden on the command line.
155staticllvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
156staticllvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
157staticllvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
158
159static SGPRRegisterRegAlloc
160defaultSGPRRegAlloc("default",
161"pick SGPR register allocator based on -O option",
162useDefaultRegisterAllocator);
163
164staticcl::opt<SGPRRegisterRegAlloc::FunctionPassCtor,false,
165RegisterPassParser<SGPRRegisterRegAlloc>>
166SGPRRegAlloc("sgpr-regalloc",cl::Hidden,cl::init(&useDefaultRegisterAllocator),
167cl::desc("Register allocator to use for SGPRs"));
168
169staticcl::opt<VGPRRegisterRegAlloc::FunctionPassCtor,false,
170RegisterPassParser<VGPRRegisterRegAlloc>>
171VGPRRegAlloc("vgpr-regalloc",cl::Hidden,cl::init(&useDefaultRegisterAllocator),
172cl::desc("Register allocator to use for VGPRs"));
173
174staticcl::opt<WWMRegisterRegAlloc::FunctionPassCtor,false,
175RegisterPassParser<WWMRegisterRegAlloc>>
176 WWMRegAlloc("wwm-regalloc",cl::Hidden,
177cl::init(&useDefaultRegisterAllocator),
178cl::desc("Register allocator to use for WWM registers"));
179
180staticvoid initializeDefaultSGPRRegisterAllocatorOnce() {
181RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
182
183if (!Ctor) {
184 Ctor = SGPRRegAlloc;
185 SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
186 }
187}
188
189staticvoid initializeDefaultVGPRRegisterAllocatorOnce() {
190RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
191
192if (!Ctor) {
193 Ctor = VGPRRegAlloc;
194 VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
195 }
196}
197
198staticvoid initializeDefaultWWMRegisterAllocatorOnce() {
199RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
200
201if (!Ctor) {
202 Ctor = WWMRegAlloc;
203 WWMRegisterRegAlloc::setDefault(WWMRegAlloc);
204 }
205}
206
207staticFunctionPass *createBasicSGPRRegisterAllocator() {
208returncreateBasicRegisterAllocator(onlyAllocateSGPRs);
209}
210
211staticFunctionPass *createGreedySGPRRegisterAllocator() {
212returncreateGreedyRegisterAllocator(onlyAllocateSGPRs);
213}
214
215staticFunctionPass *createFastSGPRRegisterAllocator() {
216returncreateFastRegisterAllocator(onlyAllocateSGPRs,false);
217}
218
219staticFunctionPass *createBasicVGPRRegisterAllocator() {
220returncreateBasicRegisterAllocator(onlyAllocateVGPRs);
221}
222
223staticFunctionPass *createGreedyVGPRRegisterAllocator() {
224returncreateGreedyRegisterAllocator(onlyAllocateVGPRs);
225}
226
227staticFunctionPass *createFastVGPRRegisterAllocator() {
228returncreateFastRegisterAllocator(onlyAllocateVGPRs,true);
229}
230
231staticFunctionPass *createBasicWWMRegisterAllocator() {
232returncreateBasicRegisterAllocator(onlyAllocateWWMRegs);
233}
234
235staticFunctionPass *createGreedyWWMRegisterAllocator() {
236returncreateGreedyRegisterAllocator(onlyAllocateWWMRegs);
237}
238
239staticFunctionPass *createFastWWMRegisterAllocator() {
240returncreateFastRegisterAllocator(onlyAllocateWWMRegs,false);
241}
242
243static SGPRRegisterRegAlloc basicRegAllocSGPR(
244"basic","basic register allocator", createBasicSGPRRegisterAllocator);
245static SGPRRegisterRegAlloc greedyRegAllocSGPR(
246"greedy","greedy register allocator", createGreedySGPRRegisterAllocator);
247
248static SGPRRegisterRegAlloc fastRegAllocSGPR(
249"fast","fast register allocator", createFastSGPRRegisterAllocator);
250
251
252static VGPRRegisterRegAlloc basicRegAllocVGPR(
253"basic","basic register allocator", createBasicVGPRRegisterAllocator);
254static VGPRRegisterRegAlloc greedyRegAllocVGPR(
255"greedy","greedy register allocator", createGreedyVGPRRegisterAllocator);
256
257static VGPRRegisterRegAlloc fastRegAllocVGPR(
258"fast","fast register allocator", createFastVGPRRegisterAllocator);
259static WWMRegisterRegAlloc basicRegAllocWWMReg("basic",
260"basic register allocator",
261 createBasicWWMRegisterAllocator);
262static WWMRegisterRegAlloc
263 greedyRegAllocWWMReg("greedy","greedy register allocator",
264 createGreedyWWMRegisterAllocator);
265static WWMRegisterRegAlloc fastRegAllocWWMReg("fast","fast register allocator",
266 createFastWWMRegisterAllocator);
267
268staticboolisLTOPreLink(ThinOrFullLTOPhasePhase) {
269returnPhase == ThinOrFullLTOPhase::FullLTOPreLink ||
270Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
271}
272}// anonymous namespace
273
274staticcl::opt<bool>
275EnableEarlyIfConversion("amdgpu-early-ifcvt",cl::Hidden,
276cl::desc("Run early if-conversion"),
277cl::init(false));
278
279staticcl::opt<bool>
280OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra",cl::Hidden,
281cl::desc("Run pre-RA exec mask optimizations"),
282cl::init(true));
283
284staticcl::opt<bool>
285LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
286cl::desc("Lower GPU ctor / dtors to globals on the device."),
287cl::init(true),cl::Hidden);
288
289// Option to disable vectorizer for tests.
290staticcl::opt<bool>EnableLoadStoreVectorizer(
291"amdgpu-load-store-vectorizer",
292cl::desc("Enable load store vectorizer"),
293cl::init(true),
294cl::Hidden);
295
296// Option to control global loads scalarization
297staticcl::opt<bool>ScalarizeGlobal(
298"amdgpu-scalarize-global-loads",
299cl::desc("Enable global load scalarization"),
300cl::init(true),
301cl::Hidden);
302
303// Option to run internalize pass.
304staticcl::opt<bool>InternalizeSymbols(
305"amdgpu-internalize-symbols",
306cl::desc("Enable elimination of non-kernel functions and unused globals"),
307cl::init(false),
308cl::Hidden);
309
310// Option to inline all early.
311staticcl::opt<bool>EarlyInlineAll(
312"amdgpu-early-inline-all",
313cl::desc("Inline all functions early"),
314cl::init(false),
315cl::Hidden);
316
317staticcl::opt<bool>RemoveIncompatibleFunctions(
318"amdgpu-enable-remove-incompatible-functions",cl::Hidden,
319cl::desc("Enable removal of functions when they"
320"use features not supported by the target GPU"),
321cl::init(true));
322
323staticcl::opt<bool>EnableSDWAPeephole(
324"amdgpu-sdwa-peephole",
325cl::desc("Enable SDWA peepholer"),
326cl::init(true));
327
328staticcl::opt<bool>EnableDPPCombine(
329"amdgpu-dpp-combine",
330cl::desc("Enable DPP combiner"),
331cl::init(true));
332
333// Enable address space based alias analysis
334staticcl::opt<bool>EnableAMDGPUAliasAnalysis("enable-amdgpu-aa",cl::Hidden,
335cl::desc("Enable AMDGPU Alias Analysis"),
336cl::init(true));
337
338// Enable lib calls simplifications
339staticcl::opt<bool>EnableLibCallSimplify(
340"amdgpu-simplify-libcall",
341cl::desc("Enable amdgpu library simplifications"),
342cl::init(true),
343cl::Hidden);
344
345staticcl::opt<bool>EnableLowerKernelArguments(
346"amdgpu-ir-lower-kernel-arguments",
347cl::desc("Lower kernel argument loads in IR pass"),
348cl::init(true),
349cl::Hidden);
350
351staticcl::opt<bool>EnableRegReassign(
352"amdgpu-reassign-regs",
353cl::desc("Enable register reassign optimizations on gfx10+"),
354cl::init(true),
355cl::Hidden);
356
357staticcl::opt<bool>OptVGPRLiveRange(
358"amdgpu-opt-vgpr-liverange",
359cl::desc("Enable VGPR liverange optimizations for if-else structure"),
360cl::init(true),cl::Hidden);
361
362staticcl::opt<ScanOptions>AMDGPUAtomicOptimizerStrategy(
363"amdgpu-atomic-optimizer-strategy",
364cl::desc("Select DPP or Iterative strategy for scan"),
365cl::init(ScanOptions::Iterative),
366cl::values(
367clEnumValN(ScanOptions::DPP,"DPP","Use DPP operations for scan"),
368clEnumValN(ScanOptions::Iterative,"Iterative",
369"Use Iterative approach for scan"),
370clEnumValN(ScanOptions::None,"None","Disable atomic optimizer")));
371
372// Enable Mode register optimization
373staticcl::opt<bool>EnableSIModeRegisterPass(
374"amdgpu-mode-register",
375cl::desc("Enable mode register pass"),
376cl::init(true),
377cl::Hidden);
378
379// Enable GFX11+ s_delay_alu insertion
380staticcl::opt<bool>
381EnableInsertDelayAlu("amdgpu-enable-delay-alu",
382cl::desc("Enable s_delay_alu insertion"),
383cl::init(true),cl::Hidden);
384
385// Enable GFX11+ VOPD
386staticcl::opt<bool>
387EnableVOPD("amdgpu-enable-vopd",
388cl::desc("Enable VOPD, dual issue of VALU in wave32"),
389cl::init(true),cl::Hidden);
390
391// Option is used in lit tests to prevent deadcoding of patterns inspected.
392staticcl::opt<bool>
393EnableDCEInRA("amdgpu-dce-in-ra",
394cl::init(true),cl::Hidden,
395cl::desc("Enable machine DCE inside regalloc"));
396
397staticcl::opt<bool>EnableSetWavePriority("amdgpu-set-wave-priority",
398cl::desc("Adjust wave priority"),
399cl::init(false),cl::Hidden);
400
401staticcl::opt<bool>EnableScalarIRPasses(
402"amdgpu-scalar-ir-passes",
403cl::desc("Enable scalar IR passes"),
404cl::init(true),
405cl::Hidden);
406
407staticcl::opt<bool>
408EnableSwLowerLDS("amdgpu-enable-sw-lower-lds",
409cl::desc("Enable lowering of lds to global memory pass "
410"and asan instrument resulting IR."),
411cl::init(true),cl::Hidden);
412
413staticcl::opt<bool, true>EnableLowerModuleLDS(
414"amdgpu-enable-lower-module-lds",cl::desc("Enable lower module lds pass"),
415cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS),cl::init(true),
416cl::Hidden);
417
418staticcl::opt<bool>EnablePreRAOptimizations(
419"amdgpu-enable-pre-ra-optimizations",
420cl::desc("Enable Pre-RA optimizations pass"),cl::init(true),
421cl::Hidden);
422
423staticcl::opt<bool>EnablePromoteKernelArguments(
424"amdgpu-enable-promote-kernel-arguments",
425cl::desc("Enable promotion of flat kernel pointer arguments to global"),
426cl::Hidden,cl::init(true));
427
428staticcl::opt<bool>EnableImageIntrinsicOptimizer(
429"amdgpu-enable-image-intrinsic-optimizer",
430cl::desc("Enable image intrinsic optimizer pass"),cl::init(true),
431cl::Hidden);
432
433staticcl::opt<bool>
434EnableLoopPrefetch("amdgpu-loop-prefetch",
435cl::desc("Enable loop data prefetch on AMDGPU"),
436cl::Hidden,cl::init(false));
437
438staticcl::opt<std::string>
439AMDGPUSchedStrategy("amdgpu-sched-strategy",
440cl::desc("Select custom AMDGPU scheduling strategy."),
441cl::Hidden,cl::init(""));
442
443staticcl::opt<bool>EnableRewritePartialRegUses(
444"amdgpu-enable-rewrite-partial-reg-uses",
445cl::desc("Enable rewrite partial reg uses pass"),cl::init(true),
446cl::Hidden);
447
448staticcl::opt<bool>EnableHipStdPar(
449"amdgpu-enable-hipstdpar",
450cl::desc("Enable HIP Standard Parallelism Offload support"),cl::init(false),
451cl::Hidden);
452
453staticcl::opt<bool>
454EnableAMDGPUAttributor("amdgpu-attributor-enable",
455cl::desc("Enable AMDGPUAttributorPass"),
456cl::init(true),cl::Hidden);
457
458staticcl::opt<bool>NewRegBankSelect(
459"new-reg-bank-select",
460cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of "
461"regbankselect"),
462cl::init(false),cl::Hidden);
463
464staticcl::opt<bool>HasClosedWorldAssumption(
465"amdgpu-link-time-closed-world",
466cl::desc("Whether has closed-world assumption at link time"),
467cl::init(false),cl::Hidden);
468
469extern"C"LLVM_EXTERNAL_VISIBILITYvoidLLVMInitializeAMDGPUTarget() {
470// Register the target
471RegisterTargetMachine<R600TargetMachine>X(getTheR600Target());
472RegisterTargetMachine<GCNTargetMachine>Y(getTheGCNTarget());
473
474PassRegistry *PR =PassRegistry::getPassRegistry();
475initializeR600ClauseMergePassPass(*PR);
476initializeR600ControlFlowFinalizerPass(*PR);
477initializeR600PacketizerPass(*PR);
478initializeR600ExpandSpecialInstrsPassPass(*PR);
479initializeR600VectorRegMergerPass(*PR);
480initializeGlobalISel(*PR);
481initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
482initializeGCNDPPCombineLegacyPass(*PR);
483initializeSILowerI1CopiesLegacyPass(*PR);
484initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
485initializeAMDGPURegBankSelectPass(*PR);
486initializeAMDGPURegBankLegalizePass(*PR);
487initializeSILowerWWMCopiesLegacyPass(*PR);
488initializeAMDGPUMarkLastScratchLoadPass(*PR);
489initializeSILowerSGPRSpillsLegacyPass(*PR);
490initializeSIFixSGPRCopiesLegacyPass(*PR);
491initializeSIFixVGPRCopiesLegacyPass(*PR);
492initializeSIFoldOperandsLegacyPass(*PR);
493initializeSIPeepholeSDWALegacyPass(*PR);
494initializeSIShrinkInstructionsLegacyPass(*PR);
495initializeSIOptimizeExecMaskingPreRAPass(*PR);
496initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
497initializeSILoadStoreOptimizerLegacyPass(*PR);
498initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
499initializeAMDGPUAlwaysInlinePass(*PR);
500initializeAMDGPUSwLowerLDSLegacyPass(*PR);
501initializeAMDGPUAttributorLegacyPass(*PR);
502initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
503initializeAMDGPUAnnotateUniformValuesLegacyPass(*PR);
504initializeAMDGPUArgumentUsageInfoPass(*PR);
505initializeAMDGPUAtomicOptimizerPass(*PR);
506initializeAMDGPULowerKernelArgumentsPass(*PR);
507initializeAMDGPUPromoteKernelArgumentsPass(*PR);
508initializeAMDGPULowerKernelAttributesPass(*PR);
509initializeAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass(*PR);
510initializeAMDGPUPostLegalizerCombinerPass(*PR);
511initializeAMDGPUPreLegalizerCombinerPass(*PR);
512initializeAMDGPURegBankCombinerPass(*PR);
513initializeAMDGPUPromoteAllocaPass(*PR);
514initializeAMDGPUPromoteAllocaToVectorPass(*PR);
515initializeAMDGPUCodeGenPreparePass(*PR);
516initializeAMDGPULateCodeGenPrepareLegacyPass(*PR);
517initializeAMDGPURemoveIncompatibleFunctionsLegacyPass(*PR);
518initializeAMDGPULowerModuleLDSLegacyPass(*PR);
519initializeAMDGPULowerBufferFatPointersPass(*PR);
520initializeAMDGPUReserveWWMRegsPass(*PR);
521initializeAMDGPURewriteOutArgumentsPass(*PR);
522initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
523initializeAMDGPUUnifyMetadataPass(*PR);
524initializeSIAnnotateControlFlowLegacyPass(*PR);
525initializeAMDGPUInsertDelayAluPass(*PR);
526initializeSIInsertHardClausesPass(*PR);
527initializeSIInsertWaitcntsPass(*PR);
528initializeSIModeRegisterPass(*PR);
529initializeSIWholeQuadModePass(*PR);
530initializeSILowerControlFlowLegacyPass(*PR);
531initializeSIPreEmitPeepholePass(*PR);
532initializeSILateBranchLoweringPass(*PR);
533initializeSIMemoryLegalizerPass(*PR);
534initializeSIOptimizeExecMaskingLegacyPass(*PR);
535initializeSIPreAllocateWWMRegsLegacyPass(*PR);
536initializeSIFormMemoryClausesPass(*PR);
537initializeSIPostRABundlerPass(*PR);
538initializeGCNCreateVOPDPass(*PR);
539initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
540initializeAMDGPUAAWrapperPassPass(*PR);
541initializeAMDGPUExternalAAWrapperPass(*PR);
542initializeAMDGPUImageIntrinsicOptimizerPass(*PR);
543initializeAMDGPUPrintfRuntimeBindingPass(*PR);
544initializeAMDGPUResourceUsageAnalysisPass(*PR);
545initializeGCNNSAReassignPass(*PR);
546initializeGCNPreRAOptimizationsPass(*PR);
547initializeGCNPreRALongBranchRegPass(*PR);
548initializeGCNRewritePartialRegUsesPass(*PR);
549initializeGCNRegPressurePrinterPass(*PR);
550initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
551}
552
553static std::unique_ptr<TargetLoweringObjectFile>createTLOF(constTriple &TT) {
554return std::make_unique<AMDGPUTargetObjectFile>();
555}
556
557staticScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
558returnnewSIScheduleDAGMI(C);
559}
560
561staticScheduleDAGInstrs *
562createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
563constGCNSubtarget &ST =C->MF->getSubtarget<GCNSubtarget>();
564ScheduleDAGMILive *DAG =
565newGCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
566 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
567if (ST.shouldClusterStores())
568 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
569 DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
570 DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
571 DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
572return DAG;
573}
574
575staticScheduleDAGInstrs *
576createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
577ScheduleDAGMILive *DAG =
578newGCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
579 DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
580return DAG;
581}
582
583staticScheduleDAGInstrs *
584createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C) {
585constGCNSubtarget &ST =C->MF->getSubtarget<GCNSubtarget>();
586ScheduleDAGMILive *DAG =newGCNScheduleDAGMILive(
587C, std::make_unique<GCNMaxMemoryClauseSchedStrategy>(C));
588 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
589if (ST.shouldClusterStores())
590 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
591 DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
592return DAG;
593}
594
595staticScheduleDAGInstrs *
596createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
597constGCNSubtarget &ST =C->MF->getSubtarget<GCNSubtarget>();
598auto *DAG =newGCNIterativeScheduler(
599C,GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
600 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
601if (ST.shouldClusterStores())
602 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
603return DAG;
604}
605
606staticScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
607returnnewGCNIterativeScheduler(C,
608GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
609}
610
611staticScheduleDAGInstrs *
612createIterativeILPMachineScheduler(MachineSchedContext *C) {
613constGCNSubtarget &ST =C->MF->getSubtarget<GCNSubtarget>();
614auto *DAG =newGCNIterativeScheduler(C,GCNIterativeScheduler::SCHEDULE_ILP);
615 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
616if (ST.shouldClusterStores())
617 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
618 DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
619return DAG;
620}
621
622staticMachineSchedRegistry
623SISchedRegistry("si","Run SI's custom scheduler",
624createSIMachineScheduler);
625
626staticMachineSchedRegistry
627GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
628"Run GCN scheduler to maximize occupancy",
629createGCNMaxOccupancyMachineScheduler);
630
631staticMachineSchedRegistry
632GCNMaxILPSchedRegistry("gcn-max-ilp","Run GCN scheduler to maximize ilp",
633createGCNMaxILPMachineScheduler);
634
635staticMachineSchedRegistryGCNMaxMemoryClauseSchedRegistry(
636"gcn-max-memory-clause","Run GCN scheduler to maximize memory clause",
637createGCNMaxMemoryClauseMachineScheduler);
638
639staticMachineSchedRegistryIterativeGCNMaxOccupancySchedRegistry(
640"gcn-iterative-max-occupancy-experimental",
641"Run GCN scheduler to maximize occupancy (experimental)",
642createIterativeGCNMaxOccupancyMachineScheduler);
643
644staticMachineSchedRegistryGCNMinRegSchedRegistry(
645"gcn-iterative-minreg",
646"Run GCN iterative scheduler for minimal register usage (experimental)",
647createMinRegScheduler);
648
649staticMachineSchedRegistryGCNILPSchedRegistry(
650"gcn-iterative-ilp",
651"Run GCN iterative scheduler for ILP scheduling (experimental)",
652createIterativeILPMachineScheduler);
653
654staticStringRefcomputeDataLayout(constTriple &TT) {
655if (TT.getArch() ==Triple::r600) {
656// 32-bit pointers.
657return"e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
658"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
659 }
660
661// 32-bit private, local, and region pointers. 64-bit global, constant and
662// flat. 160-bit non-integral fat buffer pointers that include a 128-bit
663// buffer descriptor and a 32-bit offset, which are indexed by 32-bit values
664// (address space 7), and 128-bit non-integral buffer resourcees (address
665// space 8) which cannot be non-trivilally accessed by LLVM memory operations
666// like getelementptr.
667return"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
668"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
669"v32:32-v48:64-v96:"
670"128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
671"G1-ni:7:8:9";
672}
673
674LLVM_READNONE
675staticStringRefgetGPUOrDefault(constTriple &TT,StringRef GPU) {
676if (!GPU.empty())
677return GPU;
678
679// Need to default to a target with flat support for HSA.
680if (TT.getArch() ==Triple::amdgcn)
681return TT.getOS() ==Triple::AMDHSA ?"generic-hsa" :"generic";
682
683return"r600";
684}
685
686staticReloc::ModelgetEffectiveRelocModel(std::optional<Reloc::Model> RM) {
687// The AMDGPU toolchain only supports generating shared objects, so we
688// must always use PIC.
689returnReloc::PIC_;
690}
691
692AMDGPUTargetMachine::AMDGPUTargetMachine(constTarget &T,constTriple &TT,
693StringRef CPU,StringRef FS,
694constTargetOptions &Options,
695 std::optional<Reloc::Model> RM,
696 std::optional<CodeModel::Model> CM,
697CodeGenOptLevel OptLevel)
698 :CodeGenTargetMachineImpl(
699T,computeDataLayout(TT), TT,getGPUOrDefault(TT, CPU), FS,Options,
700getEffectiveRelocModel(RM),
701getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
702 TLOF(createTLOF(getTargetTriple())) {
703initAsmInfo();
704if (TT.getArch() ==Triple::amdgcn) {
705if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
706MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave64));
707elseif (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
708MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave32));
709 }
710}
711
712boolAMDGPUTargetMachine::EnableFunctionCalls =false;
713boolAMDGPUTargetMachine::EnableLowerModuleLDS =true;
714
715AMDGPUTargetMachine::~AMDGPUTargetMachine() =default;
716
717StringRefAMDGPUTargetMachine::getGPUName(constFunction &F) const{
718Attribute GPUAttr =F.getFnAttribute("target-cpu");
719return GPUAttr.isValid() ? GPUAttr.getValueAsString() :getTargetCPU();
720}
721
722StringRefAMDGPUTargetMachine::getFeatureString(constFunction &F) const{
723Attribute FSAttr =F.getFnAttribute("target-features");
724
725return FSAttr.isValid() ? FSAttr.getValueAsString()
726 :getTargetFeatureString();
727}
728
729/// Predicate for Internalize pass.
730staticboolmustPreserveGV(constGlobalValue &GV) {
731if (constFunction *F = dyn_cast<Function>(&GV))
732returnF->isDeclaration() ||F->getName().starts_with("__asan_") ||
733F->getName().starts_with("__sanitizer_") ||
734AMDGPU::isEntryFunctionCC(F->getCallingConv());
735
736 GV.removeDeadConstantUsers();
737return !GV.use_empty();
738}
739
740voidAMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
741 AAM.registerFunctionAnalysis<AMDGPUAA>();
742}
743
744staticExpected<ScanOptions>
745parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {
746if (Params.empty())
747returnScanOptions::Iterative;
748 Params.consume_front("strategy=");
749auto Result =StringSwitch<std::optional<ScanOptions>>(Params)
750 .Case("dpp",ScanOptions::DPP)
751 .Cases("iterative","",ScanOptions::Iterative)
752 .Case("none",ScanOptions::None)
753 .Default(std::nullopt);
754if (Result)
755return *Result;
756return make_error<StringError>("invalid parameter",inconvertibleErrorCode());
757}
758
759Expected<AMDGPUAttributorOptions>
760parseAMDGPUAttributorPassOptions(StringRef Params) {
761AMDGPUAttributorOptions Result;
762while (!Params.empty()) {
763StringRef ParamName;
764 std::tie(ParamName, Params) = Params.split(';');
765if (ParamName =="closed-world") {
766 Result.IsClosedWorld =true;
767 }else {
768return make_error<StringError>(
769formatv("invalid AMDGPUAttributor pass parameter '{0}' ", ParamName)
770 .str(),
771inconvertibleErrorCode());
772 }
773 }
774return Result;
775}
776
777voidAMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
778
779#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
780#include "llvm/Passes/TargetPassRegistry.inc"
781
782PB.registerPipelineStartEPCallback(
783 [](ModulePassManager &PM,OptimizationLevel Level) {
784if (EnableHipStdPar)
785 PM.addPass(HipStdParAcceleratorCodeSelectionPass());
786 });
787
788PB.registerPipelineEarlySimplificationEPCallback(
789 [](ModulePassManager &PM,OptimizationLevel Level,
790ThinOrFullLTOPhasePhase) {
791 PM.addPass(AMDGPUPrintfRuntimeBindingPass());
792
793if (Level ==OptimizationLevel::O0)
794return;
795
796 PM.addPass(AMDGPUUnifyMetadataPass());
797
798// We don't want to run internalization at per-module stage.
799if (InternalizeSymbols && !isLTOPreLink(Phase)) {
800 PM.addPass(InternalizePass(mustPreserveGV));
801 PM.addPass(GlobalDCEPass());
802 }
803
804if (EarlyInlineAll && !EnableFunctionCalls)
805 PM.addPass(AMDGPUAlwaysInlinePass());
806 });
807
808PB.registerPeepholeEPCallback(
809 [](FunctionPassManager &FPM,OptimizationLevel Level) {
810if (Level ==OptimizationLevel::O0)
811return;
812
813 FPM.addPass(AMDGPUUseNativeCallsPass());
814if (EnableLibCallSimplify)
815 FPM.addPass(AMDGPUSimplifyLibCallsPass());
816 });
817
818PB.registerCGSCCOptimizerLateEPCallback(
819 [this](CGSCCPassManager &PM,OptimizationLevel Level) {
820if (Level ==OptimizationLevel::O0)
821return;
822
823FunctionPassManager FPM;
824
825// Add promote kernel arguments pass to the opt pipeline right before
826// infer address spaces which is needed to do actual address space
827// rewriting.
828if (Level.getSpeedupLevel() >OptimizationLevel::O1.getSpeedupLevel() &&
829EnablePromoteKernelArguments)
830 FPM.addPass(AMDGPUPromoteKernelArgumentsPass());
831
832// Add infer address spaces pass to the opt pipeline after inlining
833// but before SROA to increase SROA opportunities.
834 FPM.addPass(InferAddressSpacesPass());
835
836// This should run after inlining to have any chance of doing
837// anything, and before other cleanup optimizations.
838 FPM.addPass(AMDGPULowerKernelAttributesPass());
839
840if (Level !=OptimizationLevel::O0) {
841// Promote alloca to vector before SROA and loop unroll. If we
842// manage to eliminate allocas before unroll we may choose to unroll
843// less.
844 FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
845 }
846
847 PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
848 });
849
850// FIXME: Why is AMDGPUAttributor not in CGSCC?
851PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM,
852OptimizationLevel Level,
853ThinOrFullLTOPhasePhase) {
854if (Level !=OptimizationLevel::O0) {
855if (!isLTOPreLink(Phase))
856 MPM.addPass(AMDGPUAttributorPass(*this));
857 }
858 });
859
860PB.registerFullLinkTimeOptimizationLastEPCallback(
861 [this](ModulePassManager &PM,OptimizationLevel Level) {
862// We want to support the -lto-partitions=N option as "best effort".
863// For that, we need to lower LDS earlier in the pipeline before the
864// module is partitioned for codegen.
865if (EnableSwLowerLDS)
866 PM.addPass(AMDGPUSwLowerLDSPass(*this));
867if (EnableLowerModuleLDS)
868 PM.addPass(AMDGPULowerModuleLDSPass(*this));
869if (Level !=OptimizationLevel::O0) {
870// Do we really need internalization in LTO?
871if (InternalizeSymbols) {
872 PM.addPass(InternalizePass(mustPreserveGV));
873 PM.addPass(GlobalDCEPass());
874 }
875if (EnableAMDGPUAttributor) {
876AMDGPUAttributorOptions Opt;
877if (HasClosedWorldAssumption)
878 Opt.IsClosedWorld =true;
879 PM.addPass(AMDGPUAttributorPass(*this, Opt));
880 }
881 }
882 });
883
884PB.registerRegClassFilterParsingCallback(
885 [](StringRef FilterName) ->RegAllocFilterFunc {
886if (FilterName =="sgpr")
887return onlyAllocateSGPRs;
888if (FilterName =="vgpr")
889return onlyAllocateVGPRs;
890if (FilterName =="wwm")
891return onlyAllocateWWMRegs;
892returnnullptr;
893 });
894}
895
896int64_tAMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
897return (AddrSpace ==AMDGPUAS::LOCAL_ADDRESS ||
898 AddrSpace ==AMDGPUAS::PRIVATE_ADDRESS ||
899 AddrSpace ==AMDGPUAS::REGION_ADDRESS)
900 ? -1
901 : 0;
902}
903
904boolAMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
905unsigned DestAS) const{
906returnAMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
907AMDGPU::isFlatGlobalAddrSpace(DestAS);
908}
909
910unsignedAMDGPUTargetMachine::getAssumedAddrSpace(constValue *V) const{
911constauto *LD = dyn_cast<LoadInst>(V);
912if (!LD)// TODO: Handle invariant load like constant.
913returnAMDGPUAS::UNKNOWN_ADDRESS_SPACE;
914
915// It must be a generic pointer loaded.
916assert(V->getType()->getPointerAddressSpace() ==AMDGPUAS::FLAT_ADDRESS);
917
918constauto *Ptr = LD->getPointerOperand();
919if (Ptr->getType()->getPointerAddressSpace() !=AMDGPUAS::CONSTANT_ADDRESS)
920returnAMDGPUAS::UNKNOWN_ADDRESS_SPACE;
921// For a generic pointer loaded from the constant memory, it could be assumed
922// as a global pointer since the constant memory is only populated on the
923// host side. As implied by the offload programming model, only global
924// pointers could be referenced on the host side.
925returnAMDGPUAS::GLOBAL_ADDRESS;
926}
927
928std::pair<const Value *, unsigned>
929AMDGPUTargetMachine::getPredicatedAddrSpace(constValue *V) const{
930if (auto *II = dyn_cast<IntrinsicInst>(V)) {
931switch (II->getIntrinsicID()) {
932case Intrinsic::amdgcn_is_shared:
933return std::pair(II->getArgOperand(0),AMDGPUAS::LOCAL_ADDRESS);
934case Intrinsic::amdgcn_is_private:
935return std::pair(II->getArgOperand(0),AMDGPUAS::PRIVATE_ADDRESS);
936default:
937break;
938 }
939return std::pair(nullptr, -1);
940 }
941// Check the global pointer predication based on
942// (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
943// the order of 'is_shared' and 'is_private' is not significant.
944Value *Ptr;
945if (match(
946const_cast<Value *>(V),
947m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
948m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
949m_Deferred(Ptr))))))
950return std::pair(Ptr,AMDGPUAS::GLOBAL_ADDRESS);
951
952return std::pair(nullptr, -1);
953}
954
955unsigned
956AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const{
957switch (Kind) {
958casePseudoSourceValue::Stack:
959casePseudoSourceValue::FixedStack:
960returnAMDGPUAS::PRIVATE_ADDRESS;
961casePseudoSourceValue::ConstantPool:
962casePseudoSourceValue::GOT:
963casePseudoSourceValue::JumpTable:
964casePseudoSourceValue::GlobalValueCallEntry:
965casePseudoSourceValue::ExternalSymbolCallEntry:
966returnAMDGPUAS::CONSTANT_ADDRESS;
967 }
968returnAMDGPUAS::FLAT_ADDRESS;
969}
970
971boolAMDGPUTargetMachine::splitModule(
972Module &M,unsigned NumParts,
973function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
974// FIXME(?): Would be better to use an already existing Analysis/PassManager,
975// but all current users of this API don't have one ready and would need to
976// create one anyway. Let's hide the boilerplate for now to keep it simple.
977
978LoopAnalysisManagerLAM;
979FunctionAnalysisManagerFAM;
980CGSCCAnalysisManagerCGAM;
981ModuleAnalysisManagerMAM;
982
983PassBuilderPB(this);
984PB.registerModuleAnalyses(MAM);
985PB.registerFunctionAnalyses(FAM);
986PB.crossRegisterProxies(LAM,FAM,CGAM,MAM);
987
988ModulePassManager MPM;
989 MPM.addPass(AMDGPUSplitModulePass(NumParts, ModuleCallback));
990 MPM.run(M,MAM);
991returntrue;
992}
993
994//===----------------------------------------------------------------------===//
995// GCN Target Machine (SI+)
996//===----------------------------------------------------------------------===//
997
998GCNTargetMachine::GCNTargetMachine(constTarget &T,constTriple &TT,
999StringRef CPU,StringRef FS,
1000constTargetOptions &Options,
1001 std::optional<Reloc::Model> RM,
1002 std::optional<CodeModel::Model> CM,
1003CodeGenOptLevel OL,bool JIT)
1004 :AMDGPUTargetMachine(T, TT, CPU, FS,Options, RM, CM, OL) {}
1005
1006constTargetSubtargetInfo *
1007GCNTargetMachine::getSubtargetImpl(constFunction &F) const{
1008StringRef GPU =getGPUName(F);
1009StringRef FS =getFeatureString(F);
1010
1011SmallString<128> SubtargetKey(GPU);
1012 SubtargetKey.append(FS);
1013
1014auto &I = SubtargetMap[SubtargetKey];
1015if (!I) {
1016// This needs to be done before we create a new subtarget since any
1017// creation will depend on the TM and the code generation flags on the
1018// function that reside in TargetOptions.
1019resetTargetOptions(F);
1020I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
1021 }
1022
1023I->setScalarizeGlobalBehavior(ScalarizeGlobal);
1024
1025returnI.get();
1026}
1027
1028TargetTransformInfo
1029GCNTargetMachine::getTargetTransformInfo(constFunction &F) const{
1030returnTargetTransformInfo(GCNTTIImpl(this,F));
1031}
1032
1033ErrorGCNTargetMachine::buildCodeGenPipeline(
1034ModulePassManager &MPM,raw_pwrite_stream &Out,raw_pwrite_stream *DwoOut,
1035CodeGenFileType FileType,constCGPassBuilderOption &Opts,
1036PassInstrumentationCallbacks *PIC) {
1037AMDGPUCodeGenPassBuilder CGPB(*this, Opts,PIC);
1038return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
1039}
1040
1041//===----------------------------------------------------------------------===//
1042// AMDGPU Legacy Pass Setup
1043//===----------------------------------------------------------------------===//
1044
1045std::unique_ptr<CSEConfigBase>llvm::AMDGPUPassConfig::getCSEConfig() const{
1046returngetStandardCSEConfigForOpt(TM->getOptLevel());
1047}
1048
1049namespace{
1050
1051classGCNPassConfig final :publicAMDGPUPassConfig {
1052public:
1053 GCNPassConfig(TargetMachine &TM,PassManagerBase &PM)
1054 :AMDGPUPassConfig(TM, PM) {
1055// It is necessary to know the register usage of the entire call graph. We
1056// allow calls without EnableAMDGPUFunctionCalls if they are marked
1057// noinline, so this is always required.
1058 setRequiresCodeGenSCCOrder(true);
1059 substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
1060 }
1061
1062GCNTargetMachine &getGCNTargetMachine() const{
1063return getTM<GCNTargetMachine>();
1064 }
1065
1066ScheduleDAGInstrs *
1067 createMachineScheduler(MachineSchedContext *C)const override;
1068
1069ScheduleDAGInstrs *
1070 createPostMachineScheduler(MachineSchedContext *C) const override{
1071ScheduleDAGMI *DAG =newGCNPostScheduleDAGMILive(
1072C, std::make_unique<PostGenericScheduler>(C),
1073/*RemoveKillFlags=*/true);
1074constGCNSubtarget &ST =C->MF->getSubtarget<GCNSubtarget>();
1075 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
1076if (ST.shouldClusterStores())
1077 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
1078 DAG->addMutation(
1079createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
1080if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
1081 DAG->addMutation(createVOPDPairingMutation());
1082return DAG;
1083 }
1084
1085bool addPreISel()override;
1086void addMachineSSAOptimization()override;
1087bool addILPOpts()override;
1088bool addInstSelector()override;
1089bool addIRTranslator()override;
1090void addPreLegalizeMachineIR()override;
1091bool addLegalizeMachineIR()override;
1092void addPreRegBankSelect()override;
1093bool addRegBankSelect()override;
1094void addPreGlobalInstructionSelect()override;
1095bool addGlobalInstructionSelect()override;
1096void addFastRegAlloc()override;
1097void addOptimizedRegAlloc()override;
1098
1099FunctionPass *createSGPRAllocPass(bool Optimized);
1100FunctionPass *createVGPRAllocPass(bool Optimized);
1101FunctionPass *createWWMRegAllocPass(bool Optimized);
1102FunctionPass *createRegAllocPass(bool Optimized)override;
1103
1104bool addRegAssignAndRewriteFast()override;
1105bool addRegAssignAndRewriteOptimized()override;
1106
1107bool addPreRewrite()override;
1108void addPostRegAlloc()override;
1109void addPreSched2()override;
1110void addPreEmitPass()override;
1111};
1112
1113}// end anonymous namespace
1114
1115AMDGPUPassConfig::AMDGPUPassConfig(TargetMachine &TM,PassManagerBase &PM)
1116 :TargetPassConfig(TM, PM) {
1117// Exceptions and StackMaps are not supported, so these passes will never do
1118// anything.
1119disablePass(&StackMapLivenessID);
1120disablePass(&FuncletLayoutID);
1121// Garbage collection is not supported.
1122disablePass(&GCLoweringID);
1123disablePass(&ShadowStackGCLoweringID);
1124}
1125
1126voidAMDGPUPassConfig::addEarlyCSEOrGVNPass() {
1127if (getOptLevel() ==CodeGenOptLevel::Aggressive)
1128addPass(createGVNPass());
1129else
1130addPass(createEarlyCSEPass());
1131}
1132
1133voidAMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
1134if (isPassEnabled(EnableLoopPrefetch,CodeGenOptLevel::Aggressive))
1135addPass(createLoopDataPrefetchPass());
1136addPass(createSeparateConstOffsetFromGEPPass());
1137// ReassociateGEPs exposes more opportunities for SLSR. See
1138// the example in reassociate-geps-and-slsr.ll.
1139addPass(createStraightLineStrengthReducePass());
1140// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
1141// EarlyCSE can reuse.
1142addEarlyCSEOrGVNPass();
1143// Run NaryReassociate after EarlyCSE/GVN to be more effective.
1144addPass(createNaryReassociatePass());
1145// NaryReassociate on GEPs creates redundant common expressions, so run
1146// EarlyCSE after it.
1147addPass(createEarlyCSEPass());
1148}
1149
1150voidAMDGPUPassConfig::addIRPasses() {
1151constAMDGPUTargetMachine &TM =getAMDGPUTargetMachine();
1152
1153Triple::ArchType Arch =TM.getTargetTriple().getArch();
1154if (RemoveIncompatibleFunctions && Arch ==Triple::amdgcn)
1155addPass(createAMDGPURemoveIncompatibleFunctionsPass(&TM));
1156
1157// There is no reason to run these.
1158disablePass(&StackMapLivenessID);
1159disablePass(&FuncletLayoutID);
1160disablePass(&PatchableFunctionID);
1161
1162addPass(createAMDGPUPrintfRuntimeBinding());
1163if (LowerCtorDtor)
1164addPass(createAMDGPUCtorDtorLoweringLegacyPass());
1165
1166if (isPassEnabled(EnableImageIntrinsicOptimizer))
1167addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
1168
1169// This can be disabled by passing ::Disable here or on the command line
1170// with --expand-variadics-override=disable.
1171addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
1172
1173// Function calls are not supported, so make sure we inline everything.
1174addPass(createAMDGPUAlwaysInlinePass());
1175addPass(createAlwaysInlinerLegacyPass());
1176
1177// Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
1178if (Arch ==Triple::r600)
1179addPass(createR600OpenCLImageTypeLoweringPass());
1180
1181// Replace OpenCL enqueued block function pointers with global variables.
1182addPass(createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass());
1183
1184// Lower LDS accesses to global memory pass if address sanitizer is enabled.
1185if (EnableSwLowerLDS)
1186addPass(createAMDGPUSwLowerLDSLegacyPass(&TM));
1187
1188// Runs before PromoteAlloca so the latter can account for function uses
1189if (EnableLowerModuleLDS) {
1190addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
1191 }
1192
1193if (TM.getOptLevel() >CodeGenOptLevel::None)
1194addPass(createInferAddressSpacesPass());
1195
1196// Run atomic optimizer before Atomic Expand
1197if ((TM.getTargetTriple().getArch() ==Triple::amdgcn) &&
1198 (TM.getOptLevel() >=CodeGenOptLevel::Less) &&
1199 (AMDGPUAtomicOptimizerStrategy !=ScanOptions::None)) {
1200addPass(createAMDGPUAtomicOptimizerPass(AMDGPUAtomicOptimizerStrategy));
1201 }
1202
1203addPass(createAtomicExpandLegacyPass());
1204
1205if (TM.getOptLevel() >CodeGenOptLevel::None) {
1206addPass(createAMDGPUPromoteAlloca());
1207
1208if (isPassEnabled(EnableScalarIRPasses))
1209addStraightLineScalarOptimizationPasses();
1210
1211if (EnableAMDGPUAliasAnalysis) {
1212addPass(createAMDGPUAAWrapperPass());
1213addPass(createExternalAAWrapperPass([](Pass &P,Function &,
1214AAResults &AAR) {
1215if (auto *WrapperPass =P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
1216 AAR.addAAResult(WrapperPass->getResult());
1217 }));
1218 }
1219
1220if (TM.getTargetTriple().getArch() ==Triple::amdgcn) {
1221// TODO: May want to move later or split into an early and late one.
1222addPass(createAMDGPUCodeGenPreparePass());
1223 }
1224
1225// Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may
1226// have expanded.
1227if (TM.getOptLevel() >CodeGenOptLevel::Less)
1228addPass(createLICMPass());
1229 }
1230
1231TargetPassConfig::addIRPasses();
1232
1233// EarlyCSE is not always strong enough to clean up what LSR produces. For
1234// example, GVN can combine
1235//
1236// %0 = add %a, %b
1237// %1 = add %b, %a
1238//
1239// and
1240//
1241// %0 = shl nsw %a, 2
1242// %1 = shl %a, 2
1243//
1244// but EarlyCSE can do neither of them.
1245if (isPassEnabled(EnableScalarIRPasses))
1246addEarlyCSEOrGVNPass();
1247}
1248
1249voidAMDGPUPassConfig::addCodeGenPrepare() {
1250if (TM->getTargetTriple().getArch() ==Triple::amdgcn) {
1251// FIXME: This pass adds 2 hacky attributes that can be replaced with an
1252// analysis, and should be removed.
1253addPass(createAMDGPUAnnotateKernelFeaturesPass());
1254 }
1255
1256if (TM->getTargetTriple().getArch() ==Triple::amdgcn &&
1257EnableLowerKernelArguments)
1258addPass(createAMDGPULowerKernelArgumentsPass());
1259
1260if (TM->getTargetTriple().getArch() ==Triple::amdgcn) {
1261// This lowering has been placed after codegenprepare to take advantage of
1262// address mode matching (which is why it isn't put with the LDS lowerings).
1263// It could be placed anywhere before uniformity annotations (an analysis
1264// that it changes by splitting up fat pointers into their components)
1265// but has been put before switch lowering and CFG flattening so that those
1266// passes can run on the more optimized control flow this pass creates in
1267// many cases.
1268//
1269// FIXME: This should ideally be put after the LoadStoreVectorizer.
1270// However, due to some annoying facts about ResourceUsageAnalysis,
1271// (especially as exercised in the resource-usage-dead-function test),
1272// we need all the function passes codegenprepare all the way through
1273// said resource usage analysis to run on the call graph produced
1274// before codegenprepare runs (because codegenprepare will knock some
1275// nodes out of the graph, which leads to function-level passes not
1276// being run on them, which causes crashes in the resource usage analysis).
1277addPass(createAMDGPULowerBufferFatPointersPass());
1278// In accordance with the above FIXME, manually force all the
1279// function-level passes into a CGSCCPassManager.
1280addPass(newDummyCGSCCPass());
1281 }
1282
1283TargetPassConfig::addCodeGenPrepare();
1284
1285if (isPassEnabled(EnableLoadStoreVectorizer))
1286addPass(createLoadStoreVectorizerPass());
1287
1288// LowerSwitch pass may introduce unreachable blocks that can
1289// cause unexpected behavior for subsequent passes. Placing it
1290// here seems better that these blocks would get cleaned up by
1291// UnreachableBlockElim inserted next in the pass flow.
1292addPass(createLowerSwitchPass());
1293}
1294
1295boolAMDGPUPassConfig::addPreISel() {
1296if (TM->getOptLevel() >CodeGenOptLevel::None)
1297addPass(createFlattenCFGPass());
1298returnfalse;
1299}
1300
1301boolAMDGPUPassConfig::addInstSelector() {
1302addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(),getOptLevel()));
1303returnfalse;
1304}
1305
1306boolAMDGPUPassConfig::addGCPasses() {
1307// Do nothing. GC is not supported.
1308returnfalse;
1309}
1310
1311llvm::ScheduleDAGInstrs *
1312AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const{
1313constGCNSubtarget &ST =C->MF->getSubtarget<GCNSubtarget>();
1314ScheduleDAGMILive *DAG =createGenericSchedLive(C);
1315 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
1316if (ST.shouldClusterStores())
1317 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
1318return DAG;
1319}
1320
1321//===----------------------------------------------------------------------===//
1322// GCN Legacy Pass Setup
1323//===----------------------------------------------------------------------===//
1324
1325ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1326MachineSchedContext *C) const{
1327constGCNSubtarget &ST =C->MF->getSubtarget<GCNSubtarget>();
1328if (ST.enableSIScheduler())
1329returncreateSIMachineScheduler(C);
1330
1331Attribute SchedStrategyAttr =
1332C->MF->getFunction().getFnAttribute("amdgpu-sched-strategy");
1333StringRef SchedStrategy = SchedStrategyAttr.isValid()
1334 ? SchedStrategyAttr.getValueAsString()
1335 :AMDGPUSchedStrategy;
1336
1337if (SchedStrategy =="max-ilp")
1338returncreateGCNMaxILPMachineScheduler(C);
1339
1340if (SchedStrategy =="max-memory-clause")
1341returncreateGCNMaxMemoryClauseMachineScheduler(C);
1342
1343returncreateGCNMaxOccupancyMachineScheduler(C);
1344}
1345
1346bool GCNPassConfig::addPreISel() {
1347AMDGPUPassConfig::addPreISel();
1348
1349if (TM->getOptLevel() >CodeGenOptLevel::None)
1350 addPass(createSinkingPass());
1351
1352if (TM->getOptLevel() >CodeGenOptLevel::None)
1353 addPass(createAMDGPULateCodeGenPrepareLegacyPass());
1354
1355// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1356// regions formed by them.
1357 addPass(&AMDGPUUnifyDivergentExitNodesID);
1358 addPass(createFixIrreduciblePass());
1359 addPass(createUnifyLoopExitsPass());
1360 addPass(createStructurizeCFGPass(false));// true -> SkipUniformRegions
1361
1362 addPass(createAMDGPUAnnotateUniformValuesLegacy());
1363 addPass(createSIAnnotateControlFlowLegacyPass());
1364// TODO: Move this right after structurizeCFG to avoid extra divergence
1365// analysis. This depends on stopping SIAnnotateControlFlow from making
1366// control flow modifications.
1367 addPass(createAMDGPURewriteUndefForPHILegacyPass());
1368
1369 addPass(createLCSSAPass());
1370
1371if (TM->getOptLevel() >CodeGenOptLevel::Less)
1372 addPass(&AMDGPUPerfHintAnalysisLegacyID);
1373
1374returnfalse;
1375}
1376
1377void GCNPassConfig::addMachineSSAOptimization() {
1378TargetPassConfig::addMachineSSAOptimization();
1379
1380// We want to fold operands after PeepholeOptimizer has run (or as part of
1381// it), because it will eliminate extra copies making it easier to fold the
1382// real source operand. We want to eliminate dead instructions after, so that
1383// we see fewer uses of the copies. We then need to clean up the dead
1384// instructions leftover after the operands are folded as well.
1385//
1386// XXX - Can we get away without running DeadMachineInstructionElim again?
1387 addPass(&SIFoldOperandsLegacyID);
1388if (EnableDPPCombine)
1389 addPass(&GCNDPPCombineLegacyID);
1390 addPass(&SILoadStoreOptimizerLegacyID);
1391if (isPassEnabled(EnableSDWAPeephole)) {
1392 addPass(&SIPeepholeSDWALegacyID);
1393 addPass(&EarlyMachineLICMID);
1394 addPass(&MachineCSELegacyID);
1395 addPass(&SIFoldOperandsLegacyID);
1396 }
1397 addPass(&DeadMachineInstructionElimID);
1398 addPass(createSIShrinkInstructionsLegacyPass());
1399}
1400
1401bool GCNPassConfig::addILPOpts() {
1402if (EnableEarlyIfConversion)
1403 addPass(&EarlyIfConverterLegacyID);
1404
1405TargetPassConfig::addILPOpts();
1406returnfalse;
1407}
1408
1409bool GCNPassConfig::addInstSelector() {
1410AMDGPUPassConfig::addInstSelector();
1411 addPass(&SIFixSGPRCopiesLegacyID);
1412 addPass(createSILowerI1CopiesLegacyPass());
1413returnfalse;
1414}
1415
1416bool GCNPassConfig::addIRTranslator() {
1417 addPass(newIRTranslator(getOptLevel()));
1418returnfalse;
1419}
1420
1421void GCNPassConfig::addPreLegalizeMachineIR() {
1422bool IsOptNone = getOptLevel() ==CodeGenOptLevel::None;
1423 addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1424 addPass(newLocalizer());
1425}
1426
1427bool GCNPassConfig::addLegalizeMachineIR() {
1428 addPass(newLegalizer());
1429returnfalse;
1430}
1431
1432void GCNPassConfig::addPreRegBankSelect() {
1433bool IsOptNone = getOptLevel() ==CodeGenOptLevel::None;
1434 addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1435 addPass(createAMDGPUGlobalISelDivergenceLoweringPass());
1436}
1437
1438bool GCNPassConfig::addRegBankSelect() {
1439if (NewRegBankSelect) {
1440 addPass(createAMDGPURegBankSelectPass());
1441 addPass(createAMDGPURegBankLegalizePass());
1442 }else {
1443 addPass(newRegBankSelect());
1444 }
1445returnfalse;
1446}
1447
1448void GCNPassConfig::addPreGlobalInstructionSelect() {
1449bool IsOptNone = getOptLevel() ==CodeGenOptLevel::None;
1450 addPass(createAMDGPURegBankCombiner(IsOptNone));
1451}
1452
1453bool GCNPassConfig::addGlobalInstructionSelect() {
1454 addPass(newInstructionSelect(getOptLevel()));
1455returnfalse;
1456}
1457
1458void GCNPassConfig::addFastRegAlloc() {
1459// FIXME: We have to disable the verifier here because of PHIElimination +
1460// TwoAddressInstructions disabling it.
1461
1462// This must be run immediately after phi elimination and before
1463// TwoAddressInstructions, otherwise the processing of the tied operand of
1464// SI_ELSE will introduce a copy of the tied operand source after the else.
1465 insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
1466
1467 insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
1468
1469TargetPassConfig::addFastRegAlloc();
1470}
1471
1472void GCNPassConfig::addOptimizedRegAlloc() {
1473if (EnableDCEInRA)
1474 insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
1475
1476// FIXME: when an instruction has a Killed operand, and the instruction is
1477// inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1478// the register in LiveVariables, this would trigger a failure in verifier,
1479// we should fix it and enable the verifier.
1480if (OptVGPRLiveRange)
1481 insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeLegacyID);
1482
1483// This must be run immediately after phi elimination and before
1484// TwoAddressInstructions, otherwise the processing of the tied operand of
1485// SI_ELSE will introduce a copy of the tied operand source after the else.
1486 insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
1487
1488if (EnableRewritePartialRegUses)
1489 insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
1490
1491if (isPassEnabled(EnablePreRAOptimizations))
1492 insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID);
1493
1494// Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1495// instructions that cause scheduling barriers.
1496 insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1497
1498if (OptExecMaskPreRA)
1499 insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
1500
1501// This is not an essential optimization and it has a noticeable impact on
1502// compilation time, so we only enable it from O2.
1503if (TM->getOptLevel() >CodeGenOptLevel::Less)
1504 insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
1505
1506TargetPassConfig::addOptimizedRegAlloc();
1507}
1508
1509bool GCNPassConfig::addPreRewrite() {
1510if (EnableRegReassign)
1511 addPass(&GCNNSAReassignID);
1512returntrue;
1513}
1514
1515FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1516// Initialize the global default.
1517llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1518 initializeDefaultSGPRRegisterAllocatorOnce);
1519
1520RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1521if (Ctor !=useDefaultRegisterAllocator)
1522return Ctor();
1523
1524if (Optimized)
1525returncreateGreedyRegisterAllocator(onlyAllocateSGPRs);
1526
1527returncreateFastRegisterAllocator(onlyAllocateSGPRs,false);
1528}
1529
1530FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1531// Initialize the global default.
1532llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1533 initializeDefaultVGPRRegisterAllocatorOnce);
1534
1535RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1536if (Ctor !=useDefaultRegisterAllocator)
1537return Ctor();
1538
1539if (Optimized)
1540return createGreedyVGPRRegisterAllocator();
1541
1542return createFastVGPRRegisterAllocator();
1543}
1544
1545FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) {
1546// Initialize the global default.
1547llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag,
1548 initializeDefaultWWMRegisterAllocatorOnce);
1549
1550RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
1551if (Ctor !=useDefaultRegisterAllocator)
1552return Ctor();
1553
1554if (Optimized)
1555return createGreedyWWMRegisterAllocator();
1556
1557return createFastWWMRegisterAllocator();
1558}
1559
1560FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1561llvm_unreachable("should not be used");
1562}
1563
1564staticconstcharRegAllocOptNotSupportedMessage[] =
1565"-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1566"and -vgpr-regalloc";
1567
1568bool GCNPassConfig::addRegAssignAndRewriteFast() {
1569if (!usingDefaultRegAlloc())
1570report_fatal_error(RegAllocOptNotSupportedMessage);
1571
1572 addPass(&GCNPreRALongBranchRegID);
1573
1574 addPass(createSGPRAllocPass(false));
1575
1576// Equivalent of PEI for SGPRs.
1577 addPass(&SILowerSGPRSpillsLegacyID);
1578
1579// To Allocate wwm registers used in whole quad mode operations (for shaders).
1580 addPass(&SIPreAllocateWWMRegsLegacyID);
1581
1582// For allocating other wwm register operands.
1583 addPass(createWWMRegAllocPass(false));
1584
1585 addPass(&SILowerWWMCopiesLegacyID);
1586 addPass(&AMDGPUReserveWWMRegsID);
1587
1588// For allocating per-thread VGPRs.
1589 addPass(createVGPRAllocPass(false));
1590
1591returntrue;
1592}
1593
1594bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1595if (!usingDefaultRegAlloc())
1596report_fatal_error(RegAllocOptNotSupportedMessage);
1597
1598 addPass(&GCNPreRALongBranchRegID);
1599
1600 addPass(createSGPRAllocPass(true));
1601
1602// Commit allocated register changes. This is mostly necessary because too
1603// many things rely on the use lists of the physical registers, such as the
1604// verifier. This is only necessary with allocators which use LiveIntervals,
1605// since FastRegAlloc does the replacements itself.
1606 addPass(createVirtRegRewriter(false));
1607
1608// At this point, the sgpr-regalloc has been done and it is good to have the
1609// stack slot coloring to try to optimize the SGPR spill stack indices before
1610// attempting the custom SGPR spill lowering.
1611 addPass(&StackSlotColoringID);
1612
1613// Equivalent of PEI for SGPRs.
1614 addPass(&SILowerSGPRSpillsLegacyID);
1615
1616// To Allocate wwm registers used in whole quad mode operations (for shaders).
1617 addPass(&SIPreAllocateWWMRegsLegacyID);
1618
1619// For allocating other whole wave mode registers.
1620 addPass(createWWMRegAllocPass(true));
1621 addPass(&SILowerWWMCopiesLegacyID);
1622 addPass(createVirtRegRewriter(false));
1623 addPass(&AMDGPUReserveWWMRegsID);
1624
1625// For allocating per-thread VGPRs.
1626 addPass(createVGPRAllocPass(true));
1627
1628 addPreRewrite();
1629 addPass(&VirtRegRewriterID);
1630
1631 addPass(&AMDGPUMarkLastScratchLoadID);
1632
1633returntrue;
1634}
1635
1636void GCNPassConfig::addPostRegAlloc() {
1637 addPass(&SIFixVGPRCopiesID);
1638if (getOptLevel() >CodeGenOptLevel::None)
1639 addPass(&SIOptimizeExecMaskingLegacyID);
1640TargetPassConfig::addPostRegAlloc();
1641}
1642
1643void GCNPassConfig::addPreSched2() {
1644if (TM->getOptLevel() >CodeGenOptLevel::None)
1645 addPass(createSIShrinkInstructionsLegacyPass());
1646 addPass(&SIPostRABundlerID);
1647}
1648
1649void GCNPassConfig::addPreEmitPass() {
1650if (isPassEnabled(EnableVOPD,CodeGenOptLevel::Less))
1651 addPass(&GCNCreateVOPDID);
1652 addPass(createSIMemoryLegalizerPass());
1653 addPass(createSIInsertWaitcntsPass());
1654
1655 addPass(createSIModeRegisterPass());
1656
1657if (getOptLevel() >CodeGenOptLevel::None)
1658 addPass(&SIInsertHardClausesID);
1659
1660 addPass(&SILateBranchLoweringPassID);
1661if (isPassEnabled(EnableSetWavePriority,CodeGenOptLevel::Less))
1662 addPass(createAMDGPUSetWavePriorityPass());
1663if (getOptLevel() >CodeGenOptLevel::None)
1664 addPass(&SIPreEmitPeepholeID);
1665// The hazard recognizer that runs as part of the post-ra scheduler does not
1666// guarantee to be able handle all hazards correctly. This is because if there
1667// are multiple scheduling regions in a basic block, the regions are scheduled
1668// bottom up, so when we begin to schedule a region we don't know what
1669// instructions were emitted directly before it.
1670//
1671// Here we add a stand-alone hazard recognizer pass which can handle all
1672// cases.
1673 addPass(&PostRAHazardRecognizerID);
1674
1675if (isPassEnabled(EnableInsertDelayAlu,CodeGenOptLevel::Less))
1676 addPass(&AMDGPUInsertDelayAluID);
1677
1678 addPass(&BranchRelaxationPassID);
1679 addPass(createAMDGPUPreloadKernArgPrologLegacyPass());
1680}
1681
1682TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
1683returnnew GCNPassConfig(*this, PM);
1684}
1685
1686voidGCNTargetMachine::registerMachineRegisterInfoCallback(
1687MachineFunction &MF) const{
1688SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1689 MF.getRegInfo().addDelegate(MFI);
1690}
1691
1692MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo(
1693BumpPtrAllocator &Allocator,constFunction &F,
1694constTargetSubtargetInfo *STI) const{
1695return SIMachineFunctionInfo::create<SIMachineFunctionInfo>(
1696Allocator,F,static_cast<constGCNSubtarget *>(STI));
1697}
1698
1699yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const{
1700returnnewyaml::SIMachineFunctionInfo();
1701}
1702
1703yaml::MachineFunctionInfo *
1704GCNTargetMachine::convertFuncInfoToYAML(constMachineFunction &MF) const{
1705constSIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1706returnnewyaml::SIMachineFunctionInfo(
1707 *MFI, *MF.getSubtarget<GCNSubtarget>().getRegisterInfo(), MF);
1708}
1709
1710boolGCNTargetMachine::parseMachineFunctionInfo(
1711constyaml::MachineFunctionInfo &MFI_,PerFunctionMIParsingState &PFS,
1712SMDiagnostic &Error,SMRange &SourceRange) const{
1713constyaml::SIMachineFunctionInfo &YamlMFI =
1714static_cast<constyaml::SIMachineFunctionInfo &>(MFI_);
1715MachineFunction &MF = PFS.MF;
1716SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1717constGCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1718
1719if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS,Error, SourceRange))
1720returntrue;
1721
1722if (MFI->Occupancy == 0) {
1723// Fixup the subtarget dependent default value.
1724 MFI->Occupancy = ST.getOccupancyWithWorkGroupSizes(MF).second;
1725 }
1726
1727auto parseRegister = [&](constyaml::StringValue &RegName,Register &RegVal) {
1728Register TempReg;
1729if (parseNamedRegisterReference(PFS, TempReg,RegName.Value,Error)) {
1730 SourceRange =RegName.SourceRange;
1731returntrue;
1732 }
1733 RegVal = TempReg;
1734
1735returnfalse;
1736 };
1737
1738auto parseOptionalRegister = [&](constyaml::StringValue &RegName,
1739Register &RegVal) {
1740return !RegName.Value.empty() && parseRegister(RegName, RegVal);
1741 };
1742
1743if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
1744returntrue;
1745
1746if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy))
1747returntrue;
1748
1749if (parseOptionalRegister(YamlMFI.LongBranchReservedReg,
1750 MFI->LongBranchReservedReg))
1751returntrue;
1752
1753auto diagnoseRegisterClass = [&](constyaml::StringValue &RegName) {
1754// Create a diagnostic for a the register string literal.
1755constMemoryBuffer &Buffer =
1756 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1757Error =SMDiagnostic(*PFS.SM,SMLoc(), Buffer.getBufferIdentifier(), 1,
1758RegName.Value.size(),SourceMgr::DK_Error,
1759"incorrect register class for field",RegName.Value,
1760 {}, {});
1761 SourceRange =RegName.SourceRange;
1762returntrue;
1763 };
1764
1765if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1766 parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1767 parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1768returntrue;
1769
1770if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1771 !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1772return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1773 }
1774
1775if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1776 !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1777return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1778 }
1779
1780if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1781 !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1782return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1783 }
1784
1785for (constauto &YamlReg : YamlMFI.WWMReservedRegs) {
1786Register ParsedReg;
1787if (parseRegister(YamlReg, ParsedReg))
1788returntrue;
1789
1790 MFI->reserveWWMRegister(ParsedReg);
1791 }
1792
1793for (constauto &[_,Info] : PFS.VRegInfosNamed) {
1794 MFI->setFlag(Info->VReg,Info->Flags);
1795 }
1796for (constauto &[_,Info] : PFS.VRegInfos) {
1797 MFI->setFlag(Info->VReg,Info->Flags);
1798 }
1799
1800for (constauto &YamlRegStr : YamlMFI.SpillPhysVGPRS) {
1801Register ParsedReg;
1802if (parseRegister(YamlRegStr, ParsedReg))
1803returntrue;
1804 MFI->SpillPhysVGPRs.push_back(ParsedReg);
1805 }
1806
1807auto parseAndCheckArgument = [&](const std::optional<yaml::SIArgument> &A,
1808constTargetRegisterClass &RC,
1809ArgDescriptor &Arg,unsigned UserSGPRs,
1810unsigned SystemSGPRs) {
1811// Skip parsing if it's not present.
1812if (!A)
1813returnfalse;
1814
1815if (A->IsRegister) {
1816Register Reg;
1817if (parseNamedRegisterReference(PFS, Reg,A->RegisterName.Value,Error)) {
1818 SourceRange =A->RegisterName.SourceRange;
1819returntrue;
1820 }
1821if (!RC.contains(Reg))
1822return diagnoseRegisterClass(A->RegisterName);
1823 Arg =ArgDescriptor::createRegister(Reg);
1824 }else
1825 Arg =ArgDescriptor::createStack(A->StackOffset);
1826// Check and apply the optional mask.
1827if (A->Mask)
1828 Arg =ArgDescriptor::createArg(Arg, *A->Mask);
1829
1830 MFI->NumUserSGPRs += UserSGPRs;
1831 MFI->NumSystemSGPRs += SystemSGPRs;
1832returnfalse;
1833 };
1834
1835if (YamlMFI.ArgInfo &&
1836 (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1837 AMDGPU::SGPR_128RegClass,
1838 MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1839 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1840 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1841 2, 0) ||
1842 parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1843 MFI->ArgInfo.QueuePtr, 2, 0) ||
1844 parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1845 AMDGPU::SReg_64RegClass,
1846 MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1847 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1848 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1849 2, 0) ||
1850 parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1851 AMDGPU::SReg_64RegClass,
1852 MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1853 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1854 AMDGPU::SGPR_32RegClass,
1855 MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1856 parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,
1857 AMDGPU::SGPR_32RegClass,
1858 MFI->ArgInfo.LDSKernelId, 0, 1) ||
1859 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1860 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1861 0, 1) ||
1862 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1863 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1864 0, 1) ||
1865 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1866 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1867 0, 1) ||
1868 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1869 AMDGPU::SGPR_32RegClass,
1870 MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1871 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1872 AMDGPU::SGPR_32RegClass,
1873 MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1874 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1875 AMDGPU::SReg_64RegClass,
1876 MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1877 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1878 AMDGPU::SReg_64RegClass,
1879 MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1880 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1881 AMDGPU::VGPR_32RegClass,
1882 MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1883 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1884 AMDGPU::VGPR_32RegClass,
1885 MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1886 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1887 AMDGPU::VGPR_32RegClass,
1888 MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1889returntrue;
1890
1891if (ST.hasIEEEMode())
1892 MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1893if (ST.hasDX10ClampMode())
1894 MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1895
1896// FIXME: Move proper support for denormal-fp-math into base MachineFunction
1897 MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals
1898 ?DenormalMode::IEEE
1899 :DenormalMode::PreserveSign;
1900 MFI->Mode.FP32Denormals.Output = YamlMFI.Mode.FP32OutputDenormals
1901 ?DenormalMode::IEEE
1902 :DenormalMode::PreserveSign;
1903
1904 MFI->Mode.FP64FP16Denormals.Input = YamlMFI.Mode.FP64FP16InputDenormals
1905 ?DenormalMode::IEEE
1906 :DenormalMode::PreserveSign;
1907 MFI->Mode.FP64FP16Denormals.Output = YamlMFI.Mode.FP64FP16OutputDenormals
1908 ?DenormalMode::IEEE
1909 :DenormalMode::PreserveSign;
1910
1911if (YamlMFI.HasInitWholeWave)
1912 MFI->setInitWholeWave();
1913
1914returnfalse;
1915}
1916
1917//===----------------------------------------------------------------------===//
1918// AMDGPU CodeGen Pass Builder interface.
1919//===----------------------------------------------------------------------===//
1920
1921AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
1922GCNTargetMachine &TM,constCGPassBuilderOption &Opts,
1923PassInstrumentationCallbacks *PIC)
1924 :CodeGenPassBuilder(TM, Opts,PIC) {
1925Opt.RequiresCodeGenSCCOrder =true;
1926// Exceptions and StackMaps are not supported, so these passes will never do
1927// anything.
1928// Garbage collection is not supported.
1929disablePass<StackMapLivenessPass, FuncletLayoutPass,
1930ShadowStackGCLoweringPass>();
1931}
1932
1933voidAMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const{
1934if (RemoveIncompatibleFunctions &&TM.getTargetTriple().isAMDGCN())
1935 addPass(AMDGPURemoveIncompatibleFunctionsPass(TM));
1936
1937 addPass(AMDGPUPrintfRuntimeBindingPass());
1938if (LowerCtorDtor)
1939 addPass(AMDGPUCtorDtorLoweringPass());
1940
1941if (isPassEnabled(EnableImageIntrinsicOptimizer))
1942 addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
1943
1944// This can be disabled by passing ::Disable here or on the command line
1945// with --expand-variadics-override=disable.
1946 addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));
1947
1948 addPass(AMDGPUAlwaysInlinePass());
1949 addPass(AlwaysInlinerPass());
1950
1951 addPass(AMDGPUOpenCLEnqueuedBlockLoweringPass());
1952
1953if (EnableSwLowerLDS)
1954 addPass(AMDGPUSwLowerLDSPass(TM));
1955
1956// Runs before PromoteAlloca so the latter can account for function uses
1957if (EnableLowerModuleLDS)
1958 addPass(AMDGPULowerModuleLDSPass(TM));
1959
1960if (TM.getOptLevel() >CodeGenOptLevel::None)
1961 addPass(InferAddressSpacesPass());
1962
1963// Run atomic optimizer before Atomic Expand
1964if (TM.getOptLevel() >=CodeGenOptLevel::Less &&
1965 (AMDGPUAtomicOptimizerStrategy !=ScanOptions::None))
1966 addPass(AMDGPUAtomicOptimizerPass(TM,AMDGPUAtomicOptimizerStrategy));
1967
1968 addPass(AtomicExpandPass(&TM));
1969
1970if (TM.getOptLevel() >CodeGenOptLevel::None) {
1971 addPass(AMDGPUPromoteAllocaPass(TM));
1972if (isPassEnabled(EnableScalarIRPasses))
1973addStraightLineScalarOptimizationPasses(addPass);
1974
1975// TODO: Handle EnableAMDGPUAliasAnalysis
1976
1977// TODO: May want to move later or split into an early and late one.
1978 addPass(AMDGPUCodeGenPreparePass(TM));
1979
1980// TODO: LICM
1981 }
1982
1983Base::addIRPasses(addPass);
1984
1985// EarlyCSE is not always strong enough to clean up what LSR produces. For
1986// example, GVN can combine
1987//
1988// %0 = add %a, %b
1989// %1 = add %b, %a
1990//
1991// and
1992//
1993// %0 = shl nsw %a, 2
1994// %1 = shl %a, 2
1995//
1996// but EarlyCSE can do neither of them.
1997if (isPassEnabled(EnableScalarIRPasses))
1998addEarlyCSEOrGVNPass(addPass);
1999}
2000
2001voidAMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const{
2002// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
2003// deleted soon.
2004
2005if (EnableLowerKernelArguments)
2006 addPass(AMDGPULowerKernelArgumentsPass(TM));
2007
2008// This lowering has been placed after codegenprepare to take advantage of
2009// address mode matching (which is why it isn't put with the LDS lowerings).
2010// It could be placed anywhere before uniformity annotations (an analysis
2011// that it changes by splitting up fat pointers into their components)
2012// but has been put before switch lowering and CFG flattening so that those
2013// passes can run on the more optimized control flow this pass creates in
2014// many cases.
2015//
2016// FIXME: This should ideally be put after the LoadStoreVectorizer.
2017// However, due to some annoying facts about ResourceUsageAnalysis,
2018// (especially as exercised in the resource-usage-dead-function test),
2019// we need all the function passes codegenprepare all the way through
2020// said resource usage analysis to run on the call graph produced
2021// before codegenprepare runs (because codegenprepare will knock some
2022// nodes out of the graph, which leads to function-level passes not
2023// being run on them, which causes crashes in the resource usage analysis).
2024 addPass(AMDGPULowerBufferFatPointersPass(TM));
2025
2026Base::addCodeGenPrepare(addPass);
2027
2028if (isPassEnabled(EnableLoadStoreVectorizer))
2029 addPass(LoadStoreVectorizerPass());
2030
2031// LowerSwitch pass may introduce unreachable blocks that can cause unexpected
2032// behavior for subsequent passes. Placing it here seems better that these
2033// blocks would get cleaned up by UnreachableBlockElim inserted next in the
2034// pass flow.
2035 addPass(LowerSwitchPass());
2036}
2037
2038voidAMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const{
2039
2040if (TM.getOptLevel() >CodeGenOptLevel::None)
2041 addPass(FlattenCFGPass());
2042
2043if (TM.getOptLevel() >CodeGenOptLevel::None)
2044 addPass(SinkingPass());
2045
2046 addPass(AMDGPULateCodeGenPreparePass(TM));
2047
2048// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
2049// regions formed by them.
2050
2051 addPass(AMDGPUUnifyDivergentExitNodesPass());
2052 addPass(FixIrreduciblePass());
2053 addPass(UnifyLoopExitsPass());
2054 addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false));
2055
2056 addPass(AMDGPUAnnotateUniformValuesPass());
2057
2058 addPass(SIAnnotateControlFlowPass(TM));
2059
2060// TODO: Move this right after structurizeCFG to avoid extra divergence
2061// analysis. This depends on stopping SIAnnotateControlFlow from making
2062// control flow modifications.
2063 addPass(AMDGPURewriteUndefForPHIPass());
2064
2065 addPass(LCSSAPass());
2066
2067if (TM.getOptLevel() >CodeGenOptLevel::Less)
2068 addPass(AMDGPUPerfHintAnalysisPass(TM));
2069
2070// FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why
2071// isn't this in addInstSelector?
2072 addPass(RequireAnalysisPass<UniformityInfoAnalysis, Function>());
2073}
2074
2075voidAMDGPUCodeGenPassBuilder::addILPOpts(AddMachinePass &addPass) const{
2076if (EnableEarlyIfConversion)
2077 addPass(EarlyIfConverterPass());
2078
2079Base::addILPOpts(addPass);
2080}
2081
2082voidAMDGPUCodeGenPassBuilder::addAsmPrinter(AddMachinePass &addPass,
2083 CreateMCStreamer) const{
2084// TODO: Add AsmPrinter.
2085}
2086
2087ErrorAMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const{
2088 addPass(AMDGPUISelDAGToDAGPass(TM));
2089 addPass(SIFixSGPRCopiesPass());
2090 addPass(SILowerI1CopiesPass());
2091returnError::success();
2092}
2093
2094voidAMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
2095 AddMachinePass &addPass) const{
2096Base::addMachineSSAOptimization(addPass);
2097
2098 addPass(SIFoldOperandsPass());
2099if (EnableDPPCombine) {
2100 addPass(GCNDPPCombinePass());
2101 }
2102 addPass(SILoadStoreOptimizerPass());
2103if (isPassEnabled(EnableSDWAPeephole)) {
2104 addPass(SIPeepholeSDWAPass());
2105 addPass(EarlyMachineLICMPass());
2106 addPass(MachineCSEPass());
2107 addPass(SIFoldOperandsPass());
2108 }
2109 addPass(DeadMachineInstructionElimPass());
2110 addPass(SIShrinkInstructionsPass());
2111}
2112
2113voidAMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const{
2114 addPass(SIFixVGPRCopiesPass());
2115if (TM.getOptLevel() >CodeGenOptLevel::None)
2116 addPass(SIOptimizeExecMaskingPass());
2117Base::addPostRegAlloc(addPass);
2118}
2119
2120boolAMDGPUCodeGenPassBuilder::isPassEnabled(constcl::opt<bool> &Opt,
2121CodeGenOptLevel Level) const{
2122if (Opt.getNumOccurrences())
2123returnOpt;
2124if (TM.getOptLevel() < Level)
2125returnfalse;
2126returnOpt;
2127}
2128
2129voidAMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const{
2130if (TM.getOptLevel() ==CodeGenOptLevel::Aggressive)
2131 addPass(GVNPass());
2132else
2133 addPass(EarlyCSEPass());
2134}
2135
2136voidAMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
2137 AddIRPass &addPass) const{
2138if (isPassEnabled(EnableLoopPrefetch,CodeGenOptLevel::Aggressive))
2139 addPass(LoopDataPrefetchPass());
2140
2141 addPass(SeparateConstOffsetFromGEPPass());
2142
2143// ReassociateGEPs exposes more opportunities for SLSR. See
2144// the example in reassociate-geps-and-slsr.ll.
2145 addPass(StraightLineStrengthReducePass());
2146
2147// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
2148// EarlyCSE can reuse.
2149addEarlyCSEOrGVNPass(addPass);
2150
2151// Run NaryReassociate after EarlyCSE/GVN to be more effective.
2152 addPass(NaryReassociatePass());
2153
2154// NaryReassociate on GEPs creates redundant common expressions, so run
2155// EarlyCSE after it.
2156 addPass(EarlyCSEPass());
2157}
MRI
unsigned const MachineRegisterInfo * MRI
Definition:AArch64AdvSIMDScalarPass.cpp:105
Phase
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
Definition:AArch64FalkorHWPFFix.cpp:228
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(true))
AMDGPUAliasAnalysis.h
This is the AMGPU address space based alias analysis pass.
AMDGPUBaseInfo.h
AMDGPUCtorDtorLowering.h
AMDGPUExportClustering.h
AMDGPUIGroupLP.h
AMDGPUISelDAGToDAG.h
Defines an instruction selector for the AMDGPU target.
AMDGPUMacroFusion.h
AMDGPUOpenCLEnqueuedBlockLowering.h
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
AMDGPURemoveIncompatibleFunctions.h
AMDGPUSplitModule.h
AMDGPUTargetInfo.h
EnableDCEInRA
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
EnableLowerModuleLDS
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
GCNMaxMemoryClauseSchedRegistry
static MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)
SISchedRegistry
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
createIterativeILPMachineScheduler
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
Definition:AMDGPUTargetMachine.cpp:612
EarlyInlineAll
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
EnableSwLowerLDS
static cl::opt< bool > EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)
EnableLowerKernelArguments
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
createGCNMaxILPMachineScheduler
static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler(MachineSchedContext *C)
Definition:AMDGPUTargetMachine.cpp:576
EnableSDWAPeephole
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
GCNMinRegSchedRegistry
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
EnableImageIntrinsicOptimizer
static cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)
HasClosedWorldAssumption
static cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)
createGCNMaxMemoryClauseMachineScheduler
static ScheduleDAGInstrs * createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C)
Definition:AMDGPUTargetMachine.cpp:584
EnableSIModeRegisterPass
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
AMDGPUSchedStrategy
static cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))
EnableDPPCombine
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
IterativeGCNMaxOccupancySchedRegistry
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
EnableSetWavePriority
static cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
LowerCtorDtor
static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
OptExecMaskPreRA
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
EnablePromoteKernelArguments
static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
EnableRewritePartialRegUses
static cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)
EnableLibCallSimplify
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
GCNMaxILPSchedRegistry
static MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)
InternalizeSymbols
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
EnableAMDGPUAttributor
static cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)
getGPUOrDefault
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Definition:AMDGPUTargetMachine.cpp:675
getEffectiveRelocModel
static Reloc::Model getEffectiveRelocModel(std::optional< Reloc::Model > RM)
Definition:AMDGPUTargetMachine.cpp:686
parseAMDGPUAttributorPassOptions
Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions(StringRef Params)
Definition:AMDGPUTargetMachine.cpp:760
EnableAMDGPUAliasAnalysis
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
parseAMDGPUAtomicOptimizerStrategy
static Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy(StringRef Params)
Definition:AMDGPUTargetMachine.cpp:745
createMinRegScheduler
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
Definition:AMDGPUTargetMachine.cpp:606
EnableHipStdPar
static cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)
EnableInsertDelayAlu
static cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
createIterativeGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition:AMDGPUTargetMachine.cpp:596
EnableLoadStoreVectorizer
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
mustPreserveGV
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
Definition:AMDGPUTargetMachine.cpp:730
EnableLoopPrefetch
static cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))
LLVMInitializeAMDGPUTarget
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
Definition:AMDGPUTargetMachine.cpp:469
NewRegBankSelect
static cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)
RemoveIncompatibleFunctions
static cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))
EnableScalarIRPasses
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
EnableRegReassign
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
OptVGPRLiveRange
static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
createSIMachineScheduler
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
Definition:AMDGPUTargetMachine.cpp:557
EnablePreRAOptimizations
static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
AMDGPUAtomicOptimizerStrategy
static cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))
EnableVOPD
static cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
createGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition:AMDGPUTargetMachine.cpp:562
GCNILPSchedRegistry
static MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
ScalarizeGlobal
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
RegAllocOptNotSupportedMessage
static const char RegAllocOptNotSupportedMessage[]
Definition:AMDGPUTargetMachine.cpp:1564
GCNMaxOccupancySchedRegistry
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.
AMDGPUTargetObjectFile.h
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
AMDGPUTargetTransformInfo.h
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine.
AMDGPUUnifyDivergentExitNodes.h
AMDGPU.h
AlwaysInliner.h
Provides passes to inlining "always_inline" functions.
AtomicExpand.h
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
CGSCCPassManager.h
This header provides classes for managing passes over SCCs of the call graph.
Info
Analysis containing CSE Info
Definition:CSEInfo.cpp:27
CSEInfo.h
Provides analysis for continuously CSEing during GISel passes.
CallGraphSCCPass.h
Passes.h
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition:CommandLine.h:686
LLVM_READNONE
#define LLVM_READNONE
Definition:Compiler.h:299
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition:Compiler.h:128
DeadMachineInstructionElim.h
EarlyCSE.h
This file provides the interface for a simple, fast CSE pass.
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
ExpandVariadics.h
FixIrreducible.h
FlattenCFG.h
FormatVariadic.h
GCNDPPCombine.h
GCNIterativeScheduler.h
This file defines the class GCNIterativeScheduler, which uses an iterative approach to find a best sc...
GCNSchedStrategy.h
GCNVOPDUtils.h
GVN.h
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
GlobalDCE.h
_
#define _
Definition:HexagonMCCodeEmitter.cpp:46
HipStdPar.h
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
IPO.h
IRTranslator.h
This file declares the IRTranslator pass.
PassManager.h
This header defines various interfaces for pass management in LLVM.
InferAddressSpaces.h
InitializePasses.h
InstructionSelect.h
Internalize.h
LCSSA.h
RegName
#define RegName(no)
Options
static LVOptions Options
Definition:LVOptions.cpp:25
computeDataLayout
static std::string computeDataLayout()
Definition:LanaiTargetMachine.cpp:41
Legalizer.h
LoadStoreVectorizer.h
Localizer.h
LoopDataPrefetch.h
This file provides the interface for LLVM's Loop Data Prefetching Pass.
LowerSwitch.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
MIParser.h
MachineCSE.h
MachineLICM.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition:MachineSink.cpp:2029
II
uint64_t IntrinsicInst * II
Definition:NVVMIntrRange.cpp:51
NaryReassociate.h
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
P
#define P(N)
CGAM
CGSCCAnalysisManager CGAM
Definition:PassBuilderBindings.cpp:62
LAM
LoopAnalysisManager LAM
Definition:PassBuilderBindings.cpp:60
FAM
FunctionAnalysisManager FAM
Definition:PassBuilderBindings.cpp:61
MAM
ModuleAnalysisManager MAM
Definition:PassBuilderBindings.cpp:63
PIC
PassInstrumentationCallbacks PIC
Definition:PassBuilderBindings.cpp:57
PB
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
isLTOPreLink
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
Definition:PassBuilderPipelines.cpp:402
PatternMatch.h
R600TargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.
R600.h
Allocator
Basic Register Allocator
Definition:RegAllocBasic.cpp:146
RegAllocRegistry.h
RegBankSelect.h
This file describes the interface of the MachineFunctionPass responsible for assigning the generic vi...
SIFixSGPRCopies.h
SIFixVGPRCopies.h
SIFoldOperands.h
SILoadStoreOptimizer.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SILowerControlFlow.h
SILowerSGPRSpills.h
SILowerWWMCopies.h
SIMachineFunctionInfo.h
SIMachineScheduler.h
SI Machine Scheduler interface.
SIOptimizeExecMasking.h
SIOptimizeVGPRLiveRange.h
SIPeepholeSDWA.h
SIPreAllocateWWMRegs.h
SIShrinkInstructions.h
Scalar.h
SeparateConstOffsetFromGEP.h
SimplifyLibCalls.h
Sink.h
StraightLineStrengthReduce.h
StructurizeCFG.h
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
useDefaultRegisterAllocator
static FunctionPass * useDefaultRegisterAllocator()
-regalloc=... command line option.
Definition:TargetPassConfig.cpp:1081
TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.
TargetPassRegistry.inc
TargetRegistry.h
Utils.h
UniformityAnalysis.h
LLVM IR instance of the generic uniformity analysis.
UnifyLoopExits.h
createTLOF
static std::unique_ptr< TargetLoweringObjectFile > createTLOF()
Definition:VETargetMachine.cpp:81
FunctionPassCtor
T
llvm::AAManager
A manager for alias analyses.
Definition:AliasAnalysis.h:933
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition:AliasAnalysis.h:938
llvm::AAResults
Definition:AliasAnalysis.h:314
llvm::AAResults::addAAResult
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
Definition:AliasAnalysis.h:323
llvm::AMDGPUAAWrapperPass
Legacy wrapper pass to provide the AMDGPUAAResult object.
Definition:AMDGPUAliasAnalysis.h:61
llvm::AMDGPUAA
Analysis pass providing a never-invalidated alias analysis result.
Definition:AMDGPUAliasAnalysis.h:47
llvm::AMDGPUAnnotateUniformValuesPass
Definition:AMDGPU.h:343
llvm::AMDGPUAttributorPass
Definition:AMDGPU.h:330
llvm::AMDGPUCodeGenPassBuilder
Definition:AMDGPUTargetMachine.h:164
llvm::AMDGPUCodeGenPassBuilder::addInstSelector
Error addInstSelector(AddMachinePass &) const
Definition:AMDGPUTargetMachine.cpp:2087
llvm::AMDGPUCodeGenPassBuilder::addMachineSSAOptimization
void addMachineSSAOptimization(AddMachinePass &) const
Definition:AMDGPUTargetMachine.cpp:2094
llvm::AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass
void addEarlyCSEOrGVNPass(AddIRPass &) const
Definition:AMDGPUTargetMachine.cpp:2129
llvm::AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses
void addStraightLineScalarOptimizationPasses(AddIRPass &) const
Definition:AMDGPUTargetMachine.cpp:2136
llvm::AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder
AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC)
Definition:AMDGPUTargetMachine.cpp:1921
llvm::AMDGPUCodeGenPassBuilder::addIRPasses
void addIRPasses(AddIRPass &) const
Definition:AMDGPUTargetMachine.cpp:1933
llvm::AMDGPUCodeGenPassBuilder::addPreISel
void addPreISel(AddIRPass &addPass) const
Definition:AMDGPUTargetMachine.cpp:2038
llvm::AMDGPUCodeGenPassBuilder::addAsmPrinter
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const
Definition:AMDGPUTargetMachine.cpp:2082
llvm::AMDGPUCodeGenPassBuilder::addCodeGenPrepare
void addCodeGenPrepare(AddIRPass &) const
Definition:AMDGPUTargetMachine.cpp:2001
llvm::AMDGPUCodeGenPassBuilder::addILPOpts
void addILPOpts(AddMachinePass &) const
Definition:AMDGPUTargetMachine.cpp:2075
llvm::AMDGPUCodeGenPassBuilder::addPostRegAlloc
void addPostRegAlloc(AddMachinePass &) const
Definition:AMDGPUTargetMachine.cpp:2113
llvm::AMDGPUCodeGenPassBuilder::isPassEnabled
bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOptLevel Level=CodeGenOptLevel::Default) const
Check if a pass is enabled given Opt option.
Definition:AMDGPUTargetMachine.cpp:2120
llvm::AMDGPUCodeGenPreparePass
Definition:AMDGPU.h:297
llvm::AMDGPUCtorDtorLoweringPass
Lower llvm.global_ctors and llvm.global_dtors to special kernels.
Definition:AMDGPUCtorDtorLowering.h:19
llvm::AMDGPUISelDAGToDAGPass
Definition:AMDGPUISelDAGToDAG.h:284
llvm::AMDGPULateCodeGenPreparePass
Definition:AMDGPU.h:307
llvm::AMDGPULowerKernelArgumentsPass
Definition:AMDGPU.h:317
llvm::AMDGPUMachineFunction::setInitWholeWave
void setInitWholeWave()
Definition:AMDGPUMachineFunction.h:115
llvm::AMDGPUOpenCLEnqueuedBlockLoweringPass
Definition:AMDGPUOpenCLEnqueuedBlockLowering.h:16
llvm::AMDGPUPassConfig
Definition:AMDGPUTargetMachine.h:124
llvm::AMDGPUPassConfig::getAMDGPUTargetMachine
AMDGPUTargetMachine & getAMDGPUTargetMachine() const
Definition:AMDGPUTargetMachine.h:128
llvm::AMDGPUPassConfig::getCSEConfig
std::unique_ptr< CSEConfigBase > getCSEConfig() const override
Returns the CSEConfig object to use for the current optimization level.
Definition:AMDGPUTargetMachine.cpp:1045
llvm::AMDGPUPassConfig::createMachineScheduler
ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
Definition:AMDGPUTargetMachine.cpp:1312
llvm::AMDGPUPassConfig::isPassEnabled
bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOptLevel Level=CodeGenOptLevel::Default) const
Check if a pass is enabled given Opt option.
Definition:AMDGPUTargetMachine.h:149
llvm::AMDGPUPassConfig::addPreISel
bool addPreISel() override
Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...
Definition:AMDGPUTargetMachine.cpp:1295
llvm::AMDGPUPassConfig::addInstSelector
bool addInstSelector() override
addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...
Definition:AMDGPUTargetMachine.cpp:1301
llvm::AMDGPUPassConfig::addGCPasses
bool addGCPasses() override
addGCPasses - Add late codegen passes that analyze code for garbage collection.
Definition:AMDGPUTargetMachine.cpp:1306
llvm::AMDGPUPassConfig::addStraightLineScalarOptimizationPasses
void addStraightLineScalarOptimizationPasses()
Definition:AMDGPUTargetMachine.cpp:1133
llvm::AMDGPUPassConfig::AMDGPUPassConfig
AMDGPUPassConfig(TargetMachine &TM, PassManagerBase &PM)
Definition:AMDGPUTargetMachine.cpp:1115
llvm::AMDGPUPassConfig::addIRPasses
void addIRPasses() override
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition:AMDGPUTargetMachine.cpp:1150
llvm::AMDGPUPassConfig::addEarlyCSEOrGVNPass
void addEarlyCSEOrGVNPass()
Definition:AMDGPUTargetMachine.cpp:1126
llvm::AMDGPUPassConfig::addCodeGenPrepare
void addCodeGenPrepare() override
Add pass to prepare the LLVM IR for code generation.
Definition:AMDGPUTargetMachine.cpp:1249
llvm::AMDGPURemoveIncompatibleFunctionsPass
Definition:AMDGPURemoveIncompatibleFunctions.h:17
llvm::AMDGPURewriteUndefForPHIPass
Definition:AMDGPU.h:394
llvm::AMDGPUSplitModulePass
Splits the module M into N linkable partitions.
Definition:AMDGPUSplitModule.h:22
llvm::AMDGPUTargetMachine
Definition:AMDGPUTargetMachine.h:31
llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition:AMDGPUTargetMachine.cpp:896
llvm::AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override
getAddressSpaceForPseudoSourceKind - Given the kind of memory (e.g.
Definition:AMDGPUTargetMachine.cpp:956
llvm::AMDGPUTargetMachine::getSubtargetImpl
const TargetSubtargetInfo * getSubtargetImpl() const
llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition:AMDGPUTargetMachine.cpp:740
llvm::AMDGPUTargetMachine::~AMDGPUTargetMachine
~AMDGPUTargetMachine() override
llvm::AMDGPUTargetMachine::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Definition:AMDGPUTargetMachine.cpp:929
llvm::AMDGPUTargetMachine::getFeatureString
StringRef getFeatureString(const Function &F) const
Definition:AMDGPUTargetMachine.cpp:722
llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition:AMDGPUTargetMachine.h:39
llvm::AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOptLevel OL)
Definition:AMDGPUTargetMachine.cpp:692
llvm::AMDGPUTargetMachine::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
Definition:AMDGPUTargetMachine.cpp:904
llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks
void registerPassBuilderCallbacks(PassBuilder &PB) override
Allow the target to modify the pass pipeline.
Definition:AMDGPUTargetMachine.cpp:777
llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition:AMDGPUTargetMachine.h:40
llvm::AMDGPUTargetMachine::getGPUName
StringRef getGPUName(const Function &F) const
Definition:AMDGPUTargetMachine.cpp:717
llvm::AMDGPUTargetMachine::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition:AMDGPUTargetMachine.cpp:910
llvm::AMDGPUTargetMachine::splitModule
bool splitModule(Module &M, unsigned NumParts, function_ref< void(std::unique_ptr< Module > MPart)> ModuleCallback) override
Entry point for module splitting.
Definition:AMDGPUTargetMachine.cpp:971
llvm::AMDGPUUnifyDivergentExitNodesPass
Definition:AMDGPUUnifyDivergentExitNodes.h:29
llvm::AlwaysInlinerPass
Inlines functions marked as "always_inline".
Definition:AlwaysInliner.h:32
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition:PassManager.h:253
llvm::AtomicExpandPass
Definition:AtomicExpand.h:19
llvm::Attribute
Definition:Attributes.h:67
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition:Attributes.cpp:392
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition:Attributes.h:208
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition:Allocator.h:66
llvm::CodeGenPassBuilder
This class provides access to building LLVM's passes.
Definition:CodeGenPassBuilder.h:133
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::addPostRegAlloc
void addPostRegAlloc(AddMachinePass &) const
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition:CodeGenPassBuilder.h:334
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::addILPOpts
void addILPOpts(AddMachinePass &) const
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition:CodeGenPassBuilder.h:308
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::TM
GCNTargetMachine & TM
Definition:CodeGenPassBuilder.h:271
llvm::CodeGenPassBuilder::buildPipeline
Error buildPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType) const
Definition:CodeGenPassBuilder.h:534
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::addMachineSSAOptimization
void addMachineSSAOptimization(AddMachinePass &) const
Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...
Definition:CodeGenPassBuilder.h:1007
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::Opt
CGPassBuilderOption Opt
Definition:CodeGenPassBuilder.h:272
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::addCodeGenPrepare
void addCodeGenPrepare(AddIRPass &) const
Add pass to prepare the LLVM IR for code generation.
Definition:CodeGenPassBuilder.h:774
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::disablePass
void disablePass()
Allow the target to disable a specific pass by default.
Definition:CodeGenPassBuilder.h:489
llvm::CodeGenPassBuilder< AMDGPUCodeGenPassBuilder, GCNTargetMachine >::addIRPasses
void addIRPasses(AddIRPass &) const
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition:CodeGenPassBuilder.h:660
llvm::CodeGenTargetMachineImpl
implements a set of functionality in the TargetMachine class for targets that make use of the indepen...
Definition:CodeGenTargetMachineImpl.h:23
llvm::CodeGenTargetMachineImpl::initAsmInfo
void initAsmInfo()
Definition:CodeGenTargetMachineImpl.cpp:45
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition:Constants.cpp:739
llvm::DeadMachineInstructionElimPass
Definition:DeadMachineInstructionElim.h:17
llvm::DummyCGSCCPass
This pass is required by interprocedural register allocation.
Definition:CallGraphSCCPass.h:122
llvm::EarlyIfConverterPass
Definition:EarlyIfConversion.h:16
llvm::EarlyMachineLICMPass
Definition:MachineLICM.h:24
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition:Error.h:160
llvm::Error::success
static ErrorSuccess success()
Create a success value.
Definition:Error.h:337
llvm::ExpandVariadicsPass
Definition:ExpandVariadics.h:26
llvm::Expected
Tagged union holding either a T or a Error.
Definition:Error.h:481
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition:Pass.h:310
llvm::Function
Definition:Function.h:63
llvm::GCNDPPCombinePass
Definition:GCNDPPCombine.h:15
llvm::GCNIterativeScheduler
Definition:GCNIterativeScheduler.h:29
llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED
@ SCHEDULE_MINREGFORCED
Definition:GCNIterativeScheduler.h:35
llvm::GCNIterativeScheduler::SCHEDULE_ILP
@ SCHEDULE_ILP
Definition:GCNIterativeScheduler.h:37
llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY
@ SCHEDULE_LEGACYMAXOCCUPANCY
Definition:GCNIterativeScheduler.h:36
llvm::GCNPostScheduleDAGMILive
Definition:GCNSchedStrategy.h:487
llvm::GCNScheduleDAGMILive
Definition:GCNSchedStrategy.h:217
llvm::GCNSubtarget
Definition:GCNSubtarget.h:34
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition:GCNSubtarget.h:291
llvm::GCNTTIImpl
Definition:AMDGPUTargetTransformInfo.h:63
llvm::GCNTargetMachine
Definition:AMDGPUTargetMachine.h:80
llvm::GCNTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
Definition:AMDGPUTargetMachine.cpp:1029
llvm::GCNTargetMachine::registerMachineRegisterInfoCallback
void registerMachineRegisterInfoCallback(MachineFunction &MF) const override
Definition:AMDGPUTargetMachine.cpp:1686
llvm::GCNTargetMachine::parseMachineFunctionInfo
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
Definition:AMDGPUTargetMachine.cpp:1710
llvm::GCNTargetMachine::convertFuncInfoToYAML
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
Definition:AMDGPUTargetMachine.cpp:1704
llvm::GCNTargetMachine::buildCodeGenPipeline
Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC) override
Definition:AMDGPUTargetMachine.cpp:1033
llvm::GCNTargetMachine::createDefaultFuncInfoYAML
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
Definition:AMDGPUTargetMachine.cpp:1699
llvm::GCNTargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition:AMDGPUTargetMachine.cpp:1682
llvm::GCNTargetMachine::GCNTargetMachine
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOptLevel OL, bool JIT)
Definition:AMDGPUTargetMachine.cpp:998
llvm::GCNTargetMachine::createMachineFunctionInfo
MachineFunctionInfo * createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const override
Create the target's instance of MachineFunctionInfo.
Definition:AMDGPUTargetMachine.cpp:1692
llvm::GVNPass
The core GVN pass object.
Definition:GVN.h:124
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition:GlobalDCE.h:36
llvm::GlobalValue
Definition:GlobalValue.h:48
llvm::HipStdParAcceleratorCodeSelectionPass
Definition:HipStdPar.h:28
llvm::IRTranslator
Definition:IRTranslator.h:66
llvm::InstructionSelect
This pass is responsible for selecting generic machine instructions to target-specific instructions.
Definition:InstructionSelect.h:35
llvm::InternalizePass
A pass that internalizes all functions and variables other than those that must be preserved accordin...
Definition:Internalize.h:36
llvm::LCSSAPass
Converts loops into loop-closed SSA form.
Definition:LCSSA.h:37
llvm::Legalizer
Definition:Legalizer.h:37
llvm::LoadStoreVectorizerPass
Definition:LoadStoreVectorizer.h:18
llvm::Localizer
This pass implements the localization mechanism described at the top of this file.
Definition:Localizer.h:43
llvm::LoopDataPrefetchPass
An optimization pass inserting data prefetches in loops.
Definition:LoopDataPrefetch.h:23
llvm::MachineCSEPass
Definition:MachineCSE.h:16
llvm::MachineFunction
Definition:MachineFunction.h:267
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition:MachineFunction.h:733
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition:MachineFunction.h:743
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition:MachineFunction.h:831
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition:MachineRegisterInfo.h:51
llvm::MachineRegisterInfo::addDelegate
void addDelegate(Delegate *delegate)
Definition:MachineRegisterInfo.h:169
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition:MachineScheduler.h:156
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition:MemoryBuffer.h:51
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition:MemoryBuffer.h:76
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition:Module.h:65
llvm::NaryReassociatePass
Definition:NaryReassociate.h:102
llvm::OptimizationLevel
Definition:OptimizationLevel.h:22
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition:OptimizationLevel.h:41
llvm::OptimizationLevel::getSpeedupLevel
unsigned getSpeedupLevel() const
Definition:OptimizationLevel.h:121
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition:OptimizationLevel.h:57
llvm::PassBuilder
This class provides access to building LLVM's passes.
Definition:PassBuilder.h:105
llvm::PassBuilder::registerPipelineEarlySimplificationEPCallback
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel, ThinOrFullLTOPhase)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition:PassBuilder.h:482
llvm::PassBuilder::registerPipelineStartEPCallback
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition:PassBuilder.h:473
llvm::PassBuilder::crossRegisterProxies
void crossRegisterProxies(LoopAnalysisManager &LAM, FunctionAnalysisManager &FAM, CGSCCAnalysisManager &CGAM, ModuleAnalysisManager &MAM, MachineFunctionAnalysisManager *MFAM=nullptr)
Cross register the analysis managers through their proxies.
Definition:PassBuilder.cpp:2211
llvm::PassBuilder::registerOptimizerLastEPCallback
void registerOptimizerLastEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel, ThinOrFullLTOPhase)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition:PassBuilder.h:502
llvm::PassBuilder::registerPeepholeEPCallback
void registerPeepholeEPCallback(const std::function< void(FunctionPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition:PassBuilder.h:407
llvm::PassBuilder::registerCGSCCOptimizerLateEPCallback
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition:PassBuilder.h:452
llvm::PassBuilder::registerRegClassFilterParsingCallback
void registerRegClassFilterParsingCallback(const std::function< RegAllocFilterFunc(StringRef)> &C)
Register callbacks to parse target specific filter field if regalloc pass needs it.
Definition:PassBuilder.h:592
llvm::PassBuilder::registerModuleAnalyses
void registerModuleAnalyses(ModuleAnalysisManager &MAM)
Registers all available module analysis passes.
Definition:PassBuilder.cpp:503
llvm::PassBuilder::registerFullLinkTimeOptimizationLastEPCallback
void registerFullLinkTimeOptimizationLastEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition:PassBuilder.h:521
llvm::PassBuilder::registerFunctionAnalyses
void registerFunctionAnalyses(FunctionAnalysisManager &FAM)
Registers all available function analysis passes.
Definition:PassBuilder.cpp:521
llvm::PassManager< Module >
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition:PassManager.h:195
llvm::PassManager::run
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs)
Run all of the passes in this manager over the given unit of IR.
Definition:PassManagerImpl.h:29
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition:PassRegistry.h:37
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition:PassRegistry.cpp:24
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition:Pass.h:94
llvm::PseudoSourceValue::GOT
@ GOT
Definition:PseudoSourceValue.h:35
llvm::PseudoSourceValue::Stack
@ Stack
Definition:PseudoSourceValue.h:34
llvm::PseudoSourceValue::GlobalValueCallEntry
@ GlobalValueCallEntry
Definition:PseudoSourceValue.h:39
llvm::PseudoSourceValue::JumpTable
@ JumpTable
Definition:PseudoSourceValue.h:36
llvm::PseudoSourceValue::ExternalSymbolCallEntry
@ ExternalSymbolCallEntry
Definition:PseudoSourceValue.h:40
llvm::PseudoSourceValue::ConstantPool
@ ConstantPool
Definition:PseudoSourceValue.h:37
llvm::PseudoSourceValue::FixedStack
@ FixedStack
Definition:PseudoSourceValue.h:38
llvm::RegBankSelect
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition:RegBankSelect.h:91
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition:MachinePassRegistry.h:138
llvm::RegisterRegAllocBase
RegisterRegAllocBase class - Track the registration of register allocators.
Definition:RegAllocRegistry.h:30
llvm::RegisterRegAllocBase< RegisterRegAlloc >::FunctionPassCtor
FunctionPass *(*)() FunctionPassCtor
Definition:RegAllocRegistry.h:32
llvm::Register
Wrapper class representing virtual and physical registers.
Definition:Register.h:19
llvm::SIAnnotateControlFlowPass
Definition:AMDGPU.h:401
llvm::SIFixSGPRCopiesPass
Definition:SIFixSGPRCopies.h:16
llvm::SIFixVGPRCopiesPass
Definition:SIFixVGPRCopies.h:15
llvm::SIFoldOperandsPass
Definition:SIFoldOperands.h:15
llvm::SILoadStoreOptimizerPass
Definition:SILoadStoreOptimizer.h:17
llvm::SILowerI1CopiesPass
Definition:AMDGPU.h:87
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition:SIMachineFunctionInfo.h:390
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition:SIMachineFunctionInfo.cpp:741
llvm::SIMachineFunctionInfo::setFlag
void setFlag(Register Reg, uint8_t Flag)
Definition:SIMachineFunctionInfo.h:711
llvm::SIMachineFunctionInfo::checkFlag
bool checkFlag(Register Reg, uint8_t Flag) const
Definition:SIMachineFunctionInfo.h:717
llvm::SIMachineFunctionInfo::reserveWWMRegister
void reserveWWMRegister(Register Reg)
Definition:SIMachineFunctionInfo.h:605
llvm::SIOptimizeExecMaskingPass
Definition:SIOptimizeExecMasking.h:16
llvm::SIPeepholeSDWAPass
Definition:SIPeepholeSDWA.h:16
llvm::SIRegisterInfo
Definition:SIRegisterInfo.h:32
llvm::SIScheduleDAGMI
Definition:SIMachineScheduler.h:425
llvm::SIShrinkInstructionsPass
Definition:SIShrinkInstructions.h:17
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition:SourceMgr.h:281
llvm::SMLoc
Represents a location in source code.
Definition:SMLoc.h:23
llvm::SMRange
Represents a range in source code.
Definition:SMLoc.h:48
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition:ScheduleDAGInstrs.h:115
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition:MachineScheduler.h:407
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition:MachineScheduler.h:285
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition:MachineScheduler.h:337
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition:ScheduleDAG.h:575
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition:ScheduleDAG.h:576
llvm::SeparateConstOffsetFromGEPPass
Definition:SeparateConstOffsetFromGEP.h:17
llvm::ShadowStackGCLoweringPass
Definition:ShadowStackGCLowering.h:17
llvm::SinkingPass
Move instructions into successor blocks when possible.
Definition:Sink.h:24
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition:SmallString.h:26
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition:SmallString.h:68
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition:SourceMgr.h:132
llvm::SourceMgr::DK_Error
@ DK_Error
Definition:SourceMgr.h:34
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition:SourceMgr.h:125
llvm::StraightLineStrengthReducePass
Definition:StraightLineStrengthReduce.h:17
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StringRef::split
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition:StringRef.h:700
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition:StringRef.h:147
llvm::StringRef::consume_front
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition:StringRef.h:635
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition:StringSwitch.h:44
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition:StringSwitch.h:69
llvm::StringSwitch::Default
R Default(T Value)
Definition:StringSwitch.h:182
llvm::StringSwitch::Cases
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition:StringSwitch.h:90
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition:TargetMachine.h:77
llvm::TargetMachine::getOptLevel
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition:TargetMachine.h:257
llvm::TargetMachine::TargetTriple
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
Definition:TargetMachine.h:96
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition:TargetMachine.h:126
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition:TargetMachine.h:216
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition:TargetMachine.h:128
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition:TargetMachine.h:127
llvm::TargetMachine::STI
std::unique_ptr< const MCSubtargetInfo > STI
Definition:TargetMachine.h:109
llvm::TargetMachine::resetTargetOptions
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
Definition:TargetMachine.cpp:129
llvm::TargetMachine::MRI
std::unique_ptr< const MCRegisterInfo > MRI
Definition:TargetMachine.h:107
llvm::TargetOptions
Definition:TargetOptions.h:132
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition:TargetPassConfig.h:85
llvm::TargetPassConfig::TM
TargetMachine * TM
Definition:TargetPassConfig.h:123
llvm::TargetPassConfig::addCodeGenPrepare
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition:TargetPassConfig.cpp:944
llvm::TargetPassConfig::addILPOpts
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition:TargetPassConfig.h:399
llvm::TargetPassConfig::addPostRegAlloc
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition:TargetPassConfig.h:446
llvm::TargetPassConfig::getOptLevel
CodeGenOptLevel getOptLevel() const
Definition:TargetPassConfig.cpp:611
llvm::TargetPassConfig::addOptimizedRegAlloc
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
Definition:TargetPassConfig.cpp:1442
llvm::TargetPassConfig::addIRPasses
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition:TargetPassConfig.cpp:820
llvm::TargetPassConfig::addFastRegAlloc
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
Definition:TargetPassConfig.cpp:1432
llvm::TargetPassConfig::addMachineSSAOptimization
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
Definition:TargetPassConfig.cpp:1287
llvm::TargetPassConfig::disablePass
void disablePass(AnalysisID PassID)
Allow the target to disable a specific standard pass by default.
Definition:TargetPassConfig.h:221
llvm::TargetPassConfig::addPass
AnalysisID addPass(AnalysisID PassID)
Utilities for targets to add passes to the pass manager.
Definition:TargetPassConfig.cpp:748
llvm::TargetRegisterClass
Definition:TargetRegisterInfo.h:44
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition:TargetRegisterInfo.h:235
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition:TargetSubtargetInfo.h:63
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition:TargetTransformInfo.h:212
llvm::Target
Target - Wrapper for Target specific information.
Definition:TargetRegistry.h:144
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition:Triple.h:44
llvm::Triple::AMDHSA
@ AMDHSA
Definition:Triple.h:223
llvm::Triple::ArchType
ArchType
Definition:Triple.h:46
llvm::Triple::r600
@ r600
Definition:Triple.h:73
llvm::Triple::amdgcn
@ amdgcn
Definition:Triple.h:74
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition:Triple.h:395
llvm::Triple::isAMDGCN
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition:Triple.h:883
llvm::UnifyLoopExitsPass
Definition:UnifyLoopExits.h:16
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::use_empty
bool use_empty() const
Definition:Value.h:344
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition:STLFunctionalExtras.h:37
llvm::legacy::PassManagerBase
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
Definition:LegacyPassManager.h:39
llvm::raw_pwrite_stream
An abstract base class for streams implementations that also support a pwrite operation.
Definition:raw_ostream.h:434
PassBuilder.h
Interfaces for registering analysis passes, producing common pass manager configurations,...
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition:AMDGPUAddrSpace.h:32
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition:AMDGPUAddrSpace.h:35
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition:AMDGPUAddrSpace.h:34
llvm::AMDGPUAS::UNKNOWN_ADDRESS_SPACE
@ UNKNOWN_ADDRESS_SPACE
Definition:AMDGPUAddrSpace.h:81
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition:AMDGPUAddrSpace.h:30
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition:AMDGPUAddrSpace.h:31
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition:AMDGPUAddrSpace.h:36
llvm::AMDGPU::VirtRegFlag::WWM_REG
@ WWM_REG
Definition:SIDefines.h:1068
llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition:AMDGPUAddrSpace.h:86
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition:AMDGPUBaseInfo.cpp:2066
llvm::ARM_MB::ST
@ ST
Definition:ARMBaseInfo.h:73
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::PatternMatch
Definition:PatternMatch.h:47
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition:PatternMatch.h:2798
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition:PatternMatch.h:49
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition:PatternMatch.h:903
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition:PatternMatch.h:92
llvm::PatternMatch::m_Not
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition:PatternMatch.h:2467
llvm::Reloc::Model
Model
Definition:CodeGen.h:25
llvm::Reloc::PIC_
@ PIC_
Definition:CodeGen.h:25
llvm::SystemZISD::TM
@ TM
Definition:SystemZISelLowering.h:66
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition:CommandLine.h:711
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition:CommandLine.h:463
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::createFlattenCFGPass
FunctionPass * createFlattenCFGPass()
Definition:FlattenCFGPass.cpp:81
llvm::initializeSIFormMemoryClausesPass
void initializeSIFormMemoryClausesPass(PassRegistry &)
llvm::createFastRegisterAllocator
FunctionPass * createFastRegisterAllocator()
FastRegisterAllocation Pass - This pass register allocates as fast as possible.
Definition:RegAllocFast.cpp:1874
llvm::ScanOptions::DPP
@ DPP
llvm::ScanOptions::None
@ None
llvm::ScanOptions::Iterative
@ Iterative
llvm::EarlyMachineLICMID
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
Definition:MachineLICM.cpp:340
llvm::createAMDGPUAAWrapperPass
ImmutablePass * createAMDGPUAAWrapperPass()
Definition:AMDGPUAliasAnalysis.cpp:33
llvm::PostRAHazardRecognizerID
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Definition:PostRAHazardRecognizer.cpp:61
llvm::RegAllocFilterFunc
std::function< bool(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg)> RegAllocFilterFunc
Filter function for register classes during regalloc.
Definition:RegAllocCommon.h:25
llvm::createAMDGPUSetWavePriorityPass
FunctionPass * createAMDGPUSetWavePriorityPass()
llvm::createLCSSAPass
Pass * createLCSSAPass()
Definition:LCSSA.cpp:541
llvm::initializeGCNCreateVOPDPass
void initializeGCNCreateVOPDPass(PassRegistry &)
llvm::GCNPreRAOptimizationsID
char & GCNPreRAOptimizationsID
Definition:GCNPreRAOptimizations.cpp:79
llvm::GCLoweringID
char & GCLoweringID
GCLowering Pass - Used by gc.root to perform its default lowering operations.
Definition:GCRootLowering.cpp:108
llvm::initializeGCNPreRAOptimizationsPass
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
llvm::createExpandVariadicsPass
ModulePass * createExpandVariadicsPass(ExpandVariadicsMode)
llvm::initializeGCNRewritePartialRegUsesPass
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &)
llvm::initializeAMDGPUAttributorLegacyPass
void initializeAMDGPUAttributorLegacyPass(PassRegistry &)
llvm::SIPostRABundlerID
char & SIPostRABundlerID
Definition:SIPostRABundler.cpp:69
llvm::createSIAnnotateControlFlowLegacyPass
FunctionPass * createSIAnnotateControlFlowLegacyPass()
Create the annotation pass.
Definition:SIAnnotateControlFlow.cpp:451
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition:SIModeRegister.cpp:158
llvm::initializeSILowerWWMCopiesLegacyPass
void initializeSILowerWWMCopiesLegacyPass(PassRegistry &)
llvm::createGreedyRegisterAllocator
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
Definition:RegAllocGreedy.cpp:188
llvm::initializeAMDGPUAAWrapperPassPass
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
llvm::initializeSIShrinkInstructionsLegacyPass
void initializeSIShrinkInstructionsLegacyPass(PassRegistry &)
llvm::createAMDGPULowerBufferFatPointersPass
ModulePass * createAMDGPULowerBufferFatPointersPass()
Definition:AMDGPULowerBufferFatPointers.cpp:2407
llvm::initializeR600ClauseMergePassPass
void initializeR600ClauseMergePassPass(PassRegistry &)
llvm::initializeSIModeRegisterPass
void initializeSIModeRegisterPass(PassRegistry &)
llvm::createAMDGPUCtorDtorLoweringLegacyPass
ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()
llvm::createAMDGPUSwLowerLDSLegacyPass
ModulePass * createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
Definition:AMDGPUSwLowerLDS.cpp:1309
llvm::initializeAMDGPURewriteUndefForPHILegacyPass
void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &)
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition:AMDGPUPreLegalizerCombiner.cpp:300
llvm::GCNRewritePartialRegUsesID
char & GCNRewritePartialRegUsesID
Definition:GCNRewritePartialRegUses.cpp:494
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition:AMDGPUPostLegalizerCombiner.cpp:523
llvm::initializeAMDGPUSwLowerLDSLegacyPass
void initializeAMDGPUSwLowerLDSLegacyPass(PassRegistry &)
llvm::inconvertibleErrorCode
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition:Error.cpp:98
llvm::initializeGCNPreRALongBranchRegPass
void initializeGCNPreRALongBranchRegPass(PassRegistry &)
llvm::initializeSILowerSGPRSpillsLegacyPass
void initializeSILowerSGPRSpillsLegacyPass(PassRegistry &)
llvm::createIGroupLPDAGMutation
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
Definition:AMDGPUIGroupLP.cpp:2707
llvm::initializeAMDGPUDAGToDAGISelLegacyPass
void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &)
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition:NaryReassociate.cpp:165
llvm::PatchableFunctionID
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
Definition:PatchableFunction.cpp:66
llvm::SIOptimizeExecMaskingLegacyID
char & SIOptimizeExecMaskingLegacyID
Definition:SIOptimizeExecMasking.cpp:112
llvm::PostRASchedulerID
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
Definition:PostRASchedulerList.cpp:191
llvm::initializeR600ExpandSpecialInstrsPassPass
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
llvm::initializeR600PacketizerPass
void initializeR600PacketizerPass(PassRegistry &)
llvm::createVOPDPairingMutation
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
Definition:GCNVOPDUtils.cpp:189
llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition:AMDGPUAlwaysInlinePass.cpp:165
llvm::initializeSIPreEmitPeepholePass
void initializeSIPreEmitPeepholePass(PassRegistry &)
llvm::ExpandVariadicsMode::Lowering
@ Lowering
llvm::initializeSIFoldOperandsLegacyPass
void initializeSIFoldOperandsLegacyPass(PassRegistry &)
llvm::SILoadStoreOptimizerLegacyID
char & SILoadStoreOptimizerLegacyID
Definition:SILoadStoreOptimizer.cpp:898
llvm::initializeAMDGPUGlobalISelDivergenceLoweringPass
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &)
llvm::getStandardCSEConfigForOpt
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)
Definition:CSEInfo.cpp:89
llvm::getTheR600Target
Target & getTheR600Target()
The target for R600 GPUs.
Definition:AMDGPUTargetInfo.cpp:19
llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition:MachineScheduler.cpp:264
llvm::createStructurizeCFGPass
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Definition:StructurizeCFG.cpp:1336
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::PostMachineSchedulerID
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
Definition:MachineScheduler.cpp:295
llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass
void initializeAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass(PassRegistry &)
llvm::initializeSIInsertWaitcntsPass
void initializeSIInsertWaitcntsPass(PassRegistry &)
llvm::createLICMPass
Pass * createLICMPass()
Definition:LICM.cpp:381
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition:MachineScheduler.cpp:3845
llvm::SIFormMemoryClausesID
char & SIFormMemoryClausesID
Definition:SIFormMemoryClauses.cpp:91
llvm::initializeSILoadStoreOptimizerLegacyPass
void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &)
llvm::initializeAMDGPULowerModuleLDSLegacyPass
void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &)
llvm::initializeAMDGPUCtorDtorLoweringLegacyPass
void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &)
llvm::EarlyIfConverterLegacyID
char & EarlyIfConverterLegacyID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
Definition:EarlyIfConversion.cpp:800
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
llvm::initializeSILateBranchLoweringPass
void initializeSILateBranchLoweringPass(PassRegistry &)
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition:Pass.h:76
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition:AMDGPUUnifyDivergentExitNodes.cpp:88
llvm::createAMDGPUAtomicOptimizerPass
FunctionPass * createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy)
Definition:AMDGPUAtomicOptimizer.cpp:988
llvm::createAMDGPUPreloadKernArgPrologLegacyPass
FunctionPass * createAMDGPUPreloadKernArgPrologLegacyPass()
llvm::SIOptimizeVGPRLiveRangeLegacyID
char & SIOptimizeVGPRLiveRangeLegacyID
Definition:SIOptimizeVGPRLiveRange.cpp:634
llvm::ShadowStackGCLoweringID
char & ShadowStackGCLoweringID
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.
Definition:ShadowStackGCLowering.cpp:133
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition:GCNNSAReassign.cpp:106
llvm::initializeAMDGPURewriteOutArgumentsPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
llvm::initializeAMDGPUExternalAAWrapperPass
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
llvm::formatv
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
Definition:FormatVariadic.h:252
llvm::initializeAMDGPULowerKernelArgumentsPass
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
llvm::getEffectiveCodeModel
CodeModel::Model getEffectiveCodeModel(std::optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
Definition:CodeGenTargetMachineImpl.h:80
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition:SILateBranchLowering.cpp:66
llvm::BranchRelaxationPassID
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Definition:BranchRelaxation.cpp:132
llvm::createSinkingPass
FunctionPass * createSinkingPass()
Definition:Sink.cpp:277
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition:CGSCCPassManager.h:501
llvm::initializeAMDGPUAnnotateKernelFeaturesPass
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
llvm::CodeGenFileType
CodeGenFileType
These enums are meant to be passed into addPassesToEmitFile to indicate what type of file to emit,...
Definition:CodeGen.h:83
llvm::initializeSIPostRABundlerPass
void initializeSIPostRABundlerPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaToVectorPass
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
llvm::GCNDPPCombineLegacyID
char & GCNDPPCombineLegacyID
llvm::initializeSIWholeQuadModePass
void initializeSIWholeQuadModePass(PassRegistry &)
llvm::createStoreClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
Definition:MachineScheduler.cpp:1829
llvm::createLoopDataPrefetchPass
FunctionPass * createLoopDataPrefetchPass()
Definition:LoopDataPrefetch.cpp:151
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition:AMDGPULowerKernelArguments.cpp:509
llvm::AMDGPUInsertDelayAluID
char & AMDGPUInsertDelayAluID
Definition:AMDGPUInsertDelayAlu.cpp:461
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition:AMDGPUAnnotateKernelFeatures.cpp:135
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition:Error.cpp:167
llvm::createAMDGPUMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
Definition:AMDGPUMacroFusion.cpp:62
llvm::StackMapLivenessID
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Definition:StackMapLivenessAnalysis.cpp:86
llvm::SILowerWWMCopiesLegacyID
char & SILowerWWMCopiesLegacyID
Definition:SILowerWWMCopies.cpp:84
llvm::createUnifyLoopExitsPass
FunctionPass * createUnifyLoopExitsPass()
Definition:UnifyLoopExits.cpp:60
llvm::SIOptimizeExecMaskingPreRAID
char & SIOptimizeExecMaskingPreRAID
Definition:SIOptimizeExecMaskingPreRA.cpp:75
llvm::createFixIrreduciblePass
FunctionPass * createFixIrreduciblePass()
Definition:FixIrreducible.cpp:119
llvm::FuncletLayoutID
char & FuncletLayoutID
This pass lays out funclets contiguously.
Definition:FuncletLayout.cpp:39
llvm::initializeSIInsertHardClausesPass
void initializeSIInsertHardClausesPass(PassRegistry &)
llvm::DetectDeadLanesID
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
Definition:DetectDeadLanes.cpp:413
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::CodeGenOptLevel
CodeGenOptLevel
Code generation optimization level.
Definition:CodeGen.h:54
llvm::CodeGenOptLevel::Less
@ Less
-O1
llvm::CodeGenOptLevel::Aggressive
@ Aggressive
-O3
llvm::CodeGenOptLevel::None
@ None
-O0
llvm::initializeAMDGPUReserveWWMRegsPass
void initializeAMDGPUReserveWWMRegsPass(PassRegistry &)
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition:AMDGPUPrintfRuntimeBinding.cpp:80
llvm::StackSlotColoringID
char & StackSlotColoringID
StackSlotColoring - This pass performs stack slot coloring.
Definition:StackSlotColoring.cpp:183
llvm::initializeSIMemoryLegalizerPass
void initializeSIMemoryLegalizerPass(PassRegistry &)
llvm::createAlwaysInlinerLegacyPass
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
Definition:AlwaysInliner.cpp:164
llvm::initializeR600ControlFlowFinalizerPass
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
llvm::initializeAMDGPUImageIntrinsicOptimizerPass
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &)
llvm::initializeSILowerControlFlowLegacyPass
void initializeSILowerControlFlowLegacyPass(PassRegistry &)
llvm::SIPreAllocateWWMRegsLegacyID
char & SIPreAllocateWWMRegsLegacyID
Definition:SIPreAllocateWWMRegs.cpp:90
llvm::createAMDGPULowerModuleLDSLegacyPass
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
Definition:AMDGPULowerModuleLDSPass.cpp:1540
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
llvm::AMDGPUReserveWWMRegsID
char & AMDGPUReserveWWMRegsID
llvm::createAMDGPUPromoteAlloca
FunctionPass * createAMDGPUPromoteAlloca()
Definition:AMDGPUPromoteAlloca.cpp:244
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition:SeparateConstOffsetFromGEP.cpp:507
llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID
llvm::createAMDGPURemoveIncompatibleFunctionsPass
ModulePass * createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *)
llvm::initializeGCNRegPressurePrinterPass
void initializeGCNRegPressurePrinterPass(PassRegistry &)
llvm::initializeSILowerI1CopiesLegacyPass
void initializeSILowerI1CopiesLegacyPass(PassRegistry &)
llvm::SILowerSGPRSpillsLegacyID
char & SILowerSGPRSpillsLegacyID
Definition:SILowerSGPRSpills.cpp:102
llvm::initializeAMDGPUArgumentUsageInfoPass
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
llvm::createBasicRegisterAllocator
FunctionPass * createBasicRegisterAllocator()
BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...
Definition:RegAllocBasic.cpp:340
llvm::initializeGlobalISel
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition:GlobalISel.cpp:17
llvm::SILowerControlFlowLegacyID
char & SILowerControlFlowLegacyID
Definition:SILowerControlFlow.cpp:183
llvm::createR600OpenCLImageTypeLoweringPass
ModulePass * createR600OpenCLImageTypeLoweringPass()
Definition:R600OpenCLImageTypeLoweringPass.cpp:372
llvm::createAMDGPUCodeGenPreparePass
FunctionPass * createAMDGPUCodeGenPreparePass()
Definition:AMDGPUCodeGenPrepare.cpp:2328
llvm::initializeSIAnnotateControlFlowLegacyPass
void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &)
llvm::createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass()
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition:AMDGPUISelDAGToDAG.cpp:115
llvm::initializeSIPreAllocateWWMRegsLegacyPass
void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &)
llvm::initializeSIFixVGPRCopiesLegacyPass
void initializeSIFixVGPRCopiesLegacyPass(PassRegistry &)
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition:AMDGPUTargetInfo.cpp:25
llvm::initializeSIFixSGPRCopiesLegacyPass
void initializeSIFixSGPRCopiesLegacyPass(PassRegistry &)
llvm::initializeAMDGPUAtomicOptimizerPass
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
llvm::createGVNPass
FunctionPass * createGVNPass()
Create a legacy GVN pass.
Definition:GVN.cpp:3375
llvm::createAMDGPURegBankSelectPass
FunctionPass * createAMDGPURegBankSelectPass()
Definition:AMDGPURegBankSelect.cpp:77
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition:AMDGPURegBankCombiner.cpp:477
llvm::createAMDGPURegBankLegalizePass
FunctionPass * createAMDGPURegBankLegalizePass()
Definition:AMDGPURegBankLegalize.cpp:82
llvm::MachineCSELegacyID
char & MachineCSELegacyID
MachineCSE - This pass performs global CSE on machine instructions.
Definition:MachineCSE.cpp:164
llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition:SIWholeQuadMode.cpp:268
llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
Definition:MachineScheduler.cpp:1820
llvm::initializeSIOptimizeExecMaskingPreRAPass
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
llvm::initializeAMDGPUMarkLastScratchLoadPass
void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &)
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition:LiveVariables.cpp:61
llvm::initializeAMDGPUCodeGenPreparePass
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
llvm::createAMDGPURewriteUndefForPHILegacyPass
FunctionPass * createAMDGPURewriteUndefForPHILegacyPass()
Definition:AMDGPURewriteUndefForPHI.cpp:193
llvm::initializeSIOptimizeExecMaskingLegacyPass
void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &)
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition:Threading.h:86
llvm::createSILowerI1CopiesLegacyPass
FunctionPass * createSILowerI1CopiesLegacyPass()
Definition:SILowerI1Copies.cpp:936
llvm::initializeAMDGPULowerKernelAttributesPass
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition:SIInsertHardClauses.cpp:272
llvm::initializeAMDGPUResourceUsageAnalysisPass
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
llvm::SIFixSGPRCopiesLegacyID
char & SIFixSGPRCopiesLegacyID
Definition:SIFixSGPRCopies.cpp:194
llvm::initializeGCNDPPCombineLegacyPass
void initializeGCNDPPCombineLegacyPass(PassRegistry &)
llvm::GCNCreateVOPDID
char & GCNCreateVOPDID
Definition:GCNCreateVOPD.cpp:170
llvm::createInferAddressSpacesPass
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
Definition:InferAddressSpaces.cpp:1412
llvm::SIPeepholeSDWALegacyID
char & SIPeepholeSDWALegacyID
llvm::VirtRegRewriterID
char & VirtRegRewriterID
VirtRegRewriter pass.
Definition:VirtRegMap.cpp:250
llvm::SIFixVGPRCopiesID
char & SIFixVGPRCopiesID
llvm::SIFoldOperandsLegacyID
char & SIFoldOperandsLegacyID
llvm::createLowerSwitchPass
FunctionPass * createLowerSwitchPass()
Definition:LowerSwitch.cpp:592
llvm::initializeAMDGPUPreloadKernArgPrologLegacyPass
void initializeAMDGPUPreloadKernArgPrologLegacyPass(PassRegistry &)
llvm::createVirtRegRewriter
FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)
Definition:VirtRegMap.cpp:734
llvm::initializeR600VectorRegMergerPass
void initializeR600VectorRegMergerPass(PassRegistry &)
llvm::createExternalAAWrapperPass
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
llvm::createAMDGPUGlobalISelDivergenceLoweringPass
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
Definition:AMDGPUGlobalISelDivergenceLowering.cpp:206
llvm::createSIMemoryLegalizerPass
FunctionPass * createSIMemoryLegalizerPass()
Definition:SIMemoryLegalizer.cpp:2841
llvm::initializeAMDGPULateCodeGenPrepareLegacyPass
void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &)
llvm::initializeSIOptimizeVGPRLiveRangeLegacyPass
void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &)
llvm::initializeSIPeepholeSDWALegacyPass
void initializeSIPeepholeSDWALegacyPass(PassRegistry &)
llvm::initializeAMDGPURegBankLegalizePass
void initializeAMDGPURegBankLegalizePass(PassRegistry &)
llvm::TwoAddressInstructionPassID
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
Definition:TwoAddressInstructionPass.cpp:258
llvm::initializeAMDGPURegBankSelectPass
void initializeAMDGPURegBankSelectPass(PassRegistry &)
llvm::createAMDGPULateCodeGenPrepareLegacyPass
FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()
Definition:AMDGPULateCodeGenPrepare.cpp:540
llvm::createAtomicExpandLegacyPass
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
Definition:AtomicExpandPass.cpp:417
llvm::createGCNMCRegisterInfo
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
Definition:AMDGPUMCTargetDesc.cpp:70
llvm::createStraightLineStrengthReducePass
FunctionPass * createStraightLineStrengthReducePass()
Definition:StraightLineStrengthReduce.cpp:268
llvm::createAMDGPUImageIntrinsicOptimizerPass
FunctionPass * createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *)
Definition:AMDGPUImageIntrinsicOptimizer.cpp:325
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
llvm::initializeAMDGPULowerBufferFatPointersPass
void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &)
llvm::createSIInsertWaitcntsPass
FunctionPass * createSIInsertWaitcntsPass()
Definition:SIInsertWaitcnts.cpp:1133
llvm::createAMDGPUAnnotateUniformValuesLegacy
FunctionPass * createAMDGPUAnnotateUniformValuesLegacy()
Definition:AMDGPUAnnotateUniformValues.cpp:149
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition:EarlyCSE.cpp:1944
llvm::PHIEliminationID
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
Definition:PHIElimination.cpp:186
llvm::parseNamedRegisterReference
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
Definition:MIParser.cpp:3609
llvm::createSIShrinkInstructionsLegacyPass
FunctionPass * createSIShrinkInstructionsLegacyPass()
llvm::AMDGPUMarkLastScratchLoadID
char & AMDGPUMarkLastScratchLoadID
Definition:AMDGPUMarkLastScratchLoad.cpp:135
llvm::RenameIndependentSubregsID
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
Definition:RenameIndependentSubregs.cpp:113
llvm::initializeAMDGPUAnnotateUniformValuesLegacyPass
void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &)
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition:AMDGPUExportClustering.cpp:144
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaPass
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
llvm::initializeAMDGPURemoveIncompatibleFunctionsLegacyPass
void initializeAMDGPURemoveIncompatibleFunctionsLegacyPass(PassRegistry &)
llvm::initializeAMDGPUInsertDelayAluPass
void initializeAMDGPUInsertDelayAluPass(PassRegistry &)
llvm::initializeAMDGPUUnifyMetadataPass
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
llvm::initializeAMDGPUAlwaysInlinePass
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
llvm::Wave32
@ Wave32
Definition:AMDGPUMCTargetDesc.h:32
llvm::Wave64
@ Wave64
Definition:AMDGPUMCTargetDesc.h:32
llvm::DeadMachineInstructionElimID
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Definition:DeadMachineInstructionElim.cpp:76
llvm::AMDGPUPerfHintAnalysisLegacyID
char & AMDGPUPerfHintAnalysisLegacyID
Definition:AMDGPUPerfHintAnalysis.cpp:468
llvm::GCNPreRALongBranchRegID
char & GCNPreRALongBranchRegID
llvm::initializeAMDGPUPromoteKernelArgumentsPass
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
N
#define N
llvm::AMDGPUAlwaysInlinePass
Definition:AMDGPU.h:277
llvm::AMDGPUAtomicOptimizerPass
Definition:AMDGPU.h:263
llvm::AMDGPUAttributorOptions
Definition:AMDGPU.h:326
llvm::AMDGPUAttributorOptions::IsClosedWorld
bool IsClosedWorld
Definition:AMDGPU.h:327
llvm::AMDGPUFunctionArgInfo::PrivateSegmentBuffer
ArgDescriptor PrivateSegmentBuffer
Definition:AMDGPUArgumentUsageInfo.h:134
llvm::AMDGPUFunctionArgInfo::WorkGroupIDY
ArgDescriptor WorkGroupIDY
Definition:AMDGPUArgumentUsageInfo.h:145
llvm::AMDGPUFunctionArgInfo::WorkGroupIDZ
ArgDescriptor WorkGroupIDZ
Definition:AMDGPUArgumentUsageInfo.h:146
llvm::AMDGPUFunctionArgInfo::PrivateSegmentSize
ArgDescriptor PrivateSegmentSize
Definition:AMDGPUArgumentUsageInfo.h:140
llvm::AMDGPUFunctionArgInfo::DispatchID
ArgDescriptor DispatchID
Definition:AMDGPUArgumentUsageInfo.h:138
llvm::AMDGPUFunctionArgInfo::ImplicitArgPtr
ArgDescriptor ImplicitArgPtr
Definition:AMDGPUArgumentUsageInfo.h:152
llvm::AMDGPUFunctionArgInfo::PrivateSegmentWaveByteOffset
ArgDescriptor PrivateSegmentWaveByteOffset
Definition:AMDGPUArgumentUsageInfo.h:148
llvm::AMDGPUFunctionArgInfo::WorkGroupInfo
ArgDescriptor WorkGroupInfo
Definition:AMDGPUArgumentUsageInfo.h:147
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition:AMDGPUArgumentUsageInfo.h:161
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition:AMDGPUArgumentUsageInfo.h:160
llvm::AMDGPUFunctionArgInfo::LDSKernelId
ArgDescriptor LDSKernelId
Definition:AMDGPUArgumentUsageInfo.h:141
llvm::AMDGPUFunctionArgInfo::QueuePtr
ArgDescriptor QueuePtr
Definition:AMDGPUArgumentUsageInfo.h:136
llvm::AMDGPUFunctionArgInfo::KernargSegmentPtr
ArgDescriptor KernargSegmentPtr
Definition:AMDGPUArgumentUsageInfo.h:137
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition:AMDGPUArgumentUsageInfo.h:159
llvm::AMDGPUFunctionArgInfo::FlatScratchInit
ArgDescriptor FlatScratchInit
Definition:AMDGPUArgumentUsageInfo.h:139
llvm::AMDGPUFunctionArgInfo::DispatchPtr
ArgDescriptor DispatchPtr
Definition:AMDGPUArgumentUsageInfo.h:135
llvm::AMDGPUFunctionArgInfo::ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Definition:AMDGPUArgumentUsageInfo.h:155
llvm::AMDGPUFunctionArgInfo::WorkGroupIDX
ArgDescriptor WorkGroupIDX
Definition:AMDGPUArgumentUsageInfo.h:144
llvm::AMDGPUImageIntrinsicOptimizerPass
Definition:AMDGPU.h:75
llvm::AMDGPULowerBufferFatPointersPass
Definition:AMDGPU.h:151
llvm::AMDGPULowerKernelAttributesPass
Definition:AMDGPU.h:133
llvm::AMDGPULowerModuleLDSPass
Definition:AMDGPU.h:140
llvm::AMDGPUPerfHintAnalysisPass
Definition:AMDGPUPerfHintAnalysis.h:65
llvm::AMDGPUPrintfRuntimeBindingPass
Definition:AMDGPU.h:359
llvm::AMDGPUPromoteAllocaPass
Definition:AMDGPU.h:246
llvm::AMDGPUPromoteAllocaToVectorPass
Definition:AMDGPU.h:255
llvm::AMDGPUPromoteKernelArgumentsPass
Definition:AMDGPU.h:124
llvm::AMDGPUSimplifyLibCallsPass
Definition:AMDGPU.h:69
llvm::AMDGPUSwLowerLDSPass
Definition:AMDGPU.h:290
llvm::AMDGPUUnifyMetadataPass
Definition:AMDGPU.h:367
llvm::AMDGPUUseNativeCallsPass
Definition:AMDGPU.h:83
llvm::ArgDescriptor
Definition:AMDGPUArgumentUsageInfo.h:25
llvm::ArgDescriptor::createStack
static ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Definition:AMDGPUArgumentUsageInfo.h:50
llvm::ArgDescriptor::createArg
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition:AMDGPUArgumentUsageInfo.h:54
llvm::ArgDescriptor::createRegister
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition:AMDGPUArgumentUsageInfo.h:46
llvm::CGPassBuilderOption
Definition:CGPassBuilderOption.h:27
llvm::CGPassBuilderOption::RequiresCodeGenSCCOrder
bool RequiresCodeGenSCCOrder
Definition:CGPassBuilderOption.h:52
llvm::DenormalMode::Input
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
Definition:FloatingPointMode.h:96
llvm::DenormalMode::PreserveSign
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
Definition:FloatingPointMode.h:80
llvm::DenormalMode::IEEE
@ IEEE
IEEE-754 denormal numbers preserved.
Definition:FloatingPointMode.h:77
llvm::DenormalMode::Output
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
Definition:FloatingPointMode.h:91
llvm::EarlyCSEPass
A simple and fast domtree-based CSE pass.
Definition:EarlyCSE.h:30
llvm::FixIrreduciblePass
Definition:FixIrreducible.h:15
llvm::FlattenCFGPass
Definition:FlattenCFG.h:20
llvm::InferAddressSpacesPass
Definition:InferAddressSpaces.h:16
llvm::LowerSwitchPass
Definition:LowerSwitch.h:21
llvm::MachineFunctionInfo
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
Definition:MachineFunction.h:104
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition:MachineScheduler.h:136
llvm::PassInstrumentationCallbacks
This class manages callbacks registration, as well as provides a way for PassInstrumentation to pass ...
Definition:PassInstrumentation.h:75
llvm::PerFunctionMIParsingState
Definition:MIParser.h:165
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition:MIParser.h:168
llvm::PerFunctionMIParsingState::VRegInfosNamed
StringMap< VRegInfo * > VRegInfosNamed
Definition:MIParser.h:177
llvm::PerFunctionMIParsingState::MF
MachineFunction & MF
Definition:MIParser.h:167
llvm::PerFunctionMIParsingState::VRegInfos
DenseMap< Register, VRegInfo * > VRegInfos
Definition:MIParser.h:176
llvm::RegisterTargetMachine
RegisterTargetMachine - Helper template for registering a target machine implementation,...
Definition:TargetRegistry.h:1248
llvm::RequireAnalysisPass
A utility pass template to force an analysis result to be available.
Definition:PassManager.h:878
llvm::SIModeRegisterDefaults::DX10Clamp
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
Definition:SIModeRegisterDefaults.h:29
llvm::SIModeRegisterDefaults::FP64FP16Denormals
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Definition:SIModeRegisterDefaults.h:37
llvm::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition:SIModeRegisterDefaults.h:25
llvm::SIModeRegisterDefaults::FP32Denormals
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Definition:SIModeRegisterDefaults.h:33
llvm::StructurizeCFGPass
Definition:StructurizeCFG.h:15
llvm::cl::desc
Definition:CommandLine.h:409
llvm::once_flag
The llvm::once_flag structure.
Definition:Threading.h:67
llvm::yaml::MachineFunctionInfo
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Definition:MIRYamlMapping.h:732
llvm::yaml::SIMachineFunctionInfo
Definition:SIMachineFunctionInfo.h:260
llvm::yaml::SIMachineFunctionInfo::SGPRForEXECCopy
StringValue SGPRForEXECCopy
Definition:SIMachineFunctionInfo.h:297
llvm::yaml::SIMachineFunctionInfo::WWMReservedRegs
SmallVector< StringValue > WWMReservedRegs
Definition:SIMachineFunctionInfo.h:279
llvm::yaml::SIMachineFunctionInfo::FrameOffsetReg
StringValue FrameOffsetReg
Definition:SIMachineFunctionInfo.h:282
llvm::yaml::SIMachineFunctionInfo::LongBranchReservedReg
StringValue LongBranchReservedReg
Definition:SIMachineFunctionInfo.h:298
llvm::yaml::SIMachineFunctionInfo::VGPRForAGPRCopy
StringValue VGPRForAGPRCopy
Definition:SIMachineFunctionInfo.h:296
llvm::yaml::SIMachineFunctionInfo::Mode
SIMode Mode
Definition:SIMachineFunctionInfo.h:294
llvm::yaml::SIMachineFunctionInfo::HasInitWholeWave
bool HasInitWholeWave
Definition:SIMachineFunctionInfo.h:300
llvm::yaml::SIMachineFunctionInfo::ArgInfo
std::optional< SIArgumentInfo > ArgInfo
Definition:SIMachineFunctionInfo.h:288
llvm::yaml::SIMachineFunctionInfo::SpillPhysVGPRS
SmallVector< StringValue, 2 > SpillPhysVGPRS
Definition:SIMachineFunctionInfo.h:278
llvm::yaml::SIMachineFunctionInfo::ScratchRSrcReg
StringValue ScratchRSrcReg
Definition:SIMachineFunctionInfo.h:281
llvm::yaml::SIMachineFunctionInfo::StackPtrOffsetReg
StringValue StackPtrOffsetReg
Definition:SIMachineFunctionInfo.h:283
llvm::yaml::SIMode::IEEE
bool IEEE
Definition:SIMachineFunctionInfo.h:218
llvm::yaml::SIMode::DX10Clamp
bool DX10Clamp
Definition:SIMachineFunctionInfo.h:219
llvm::yaml::SIMode::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition:SIMachineFunctionInfo.h:223
llvm::yaml::SIMode::FP64FP16InputDenormals
bool FP64FP16InputDenormals
Definition:SIMachineFunctionInfo.h:222
llvm::yaml::SIMode::FP32OutputDenormals
bool FP32OutputDenormals
Definition:SIMachineFunctionInfo.h:221
llvm::yaml::SIMode::FP32InputDenormals
bool FP32InputDenormals
Definition:SIMachineFunctionInfo.h:220
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition:MIRYamlMapping.h:34

Generated on Sun Jul 20 2025 11:15:49 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp