Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
15#include "MCTargetDesc/RISCVMatInt.h"
16#include "RISCV.h"
17#include "RISCVConstantPoolValue.h"
18#include "RISCVMachineFunctionInfo.h"
19#include "RISCVRegisterInfo.h"
20#include "RISCVSelectionDAGInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/MemoryLocation.h"
25#include "llvm/Analysis/VectorUtils.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineJumpTableInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
32#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/IR/DiagnosticInfo.h"
35#include "llvm/IR/DiagnosticPrinter.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/PatternMatch.h"
40#include "llvm/MC/MCCodeEmitter.h"
41#include "llvm/MC/MCInstBuilder.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/InstructionCost.h"
46#include "llvm/Support/KnownBits.h"
47#include "llvm/Support/MathExtras.h"
48#include "llvm/Support/raw_ostream.h"
49#include <optional>
50
51using namespacellvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls,"Number of tail calls");
56
57staticcl::opt<unsigned>ExtensionMaxWebSize(
58DEBUG_TYPE"-ext-max-web-size",cl::Hidden,
59cl::desc("Give the maximum size (in number of nodes) of the web of "
60"instructions that we will consider for VW expansion"),
61cl::init(18));
62
63staticcl::opt<bool>
64AllowSplatInVW_W(DEBUG_TYPE"-form-vw-w-with-splat",cl::Hidden,
65cl::desc("Allow the formation of VW_W operations (e.g., "
66"VWADD_W) with splat constants"),
67cl::init(false));
68
69staticcl::opt<unsigned>NumRepeatedDivisors(
70DEBUG_TYPE"-fp-repeated-divisors",cl::Hidden,
71cl::desc("Set the minimum number of repetitions of a divisor to allow "
72"transformation to multiplications by the reciprocal"),
73cl::init(2));
74
75staticcl::opt<int>
76FPImmCost(DEBUG_TYPE"-fpimm-cost",cl::Hidden,
77cl::desc("Give the maximum number of instructions that we will "
78"use for creating a floating-point immediate value"),
79cl::init(2));
80
81RISCVTargetLowering::RISCVTargetLowering(constTargetMachine &TM,
82constRISCVSubtarget &STI)
83 :TargetLowering(TM), Subtarget(STI) {
84
85RISCVABI::ABI ABI = Subtarget.getTargetABI();
86assert(ABI !=RISCVABI::ABI_Unknown &&"Improperly initialised target ABI");
87
88if ((ABI ==RISCVABI::ABI_ILP32F || ABI ==RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90errs() <<"Hard-float 'f' ABI can't be used for a target that "
91"doesn't support the F instruction set extension (ignoring "
92"target-abi)\n";
93 ABI = Subtarget.is64Bit() ?RISCVABI::ABI_LP64 :RISCVABI::ABI_ILP32;
94 }elseif ((ABI ==RISCVABI::ABI_ILP32D || ABI ==RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96errs() <<"Hard-float 'd' ABI can't be used for a target that "
97"doesn't support the D instruction set extension (ignoring "
98"target-abi)\n";
99 ABI = Subtarget.is64Bit() ?RISCVABI::ABI_LP64 :RISCVABI::ABI_ILP32;
100 }
101
102switch (ABI) {
103default:
104report_fatal_error("Don't know how to lower this ABI");
105caseRISCVABI::ABI_ILP32:
106caseRISCVABI::ABI_ILP32E:
107caseRISCVABI::ABI_LP64E:
108caseRISCVABI::ABI_ILP32F:
109caseRISCVABI::ABI_ILP32D:
110caseRISCVABI::ABI_LP64:
111caseRISCVABI::ABI_LP64F:
112caseRISCVABI::ABI_LP64D:
113break;
114 }
115
116MVT XLenVT = Subtarget.getXLenVT();
117
118// Set up the register classes.
119addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121if (Subtarget.hasStdExtZfhmin())
122addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123if (Subtarget.hasStdExtZfbfmin())
124addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125if (Subtarget.hasStdExtF())
126addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127if (Subtarget.hasStdExtD())
128addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129if (Subtarget.hasStdExtZhinxmin())
130addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131if (Subtarget.hasStdExtZfinx())
132addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133if (Subtarget.hasStdExtZdinx()) {
134if (Subtarget.is64Bit())
135addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136else
137addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140staticconstMVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143staticconstMVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149staticconstMVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152staticconstMVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155staticconstMVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157staticconstMVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159staticconstMVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172if (Subtarget.hasVInstructions()) {
173auto addRegClassForRVV = [this](MVT VT) {
174// Disable the smallest fractional LMUL types if ELEN is less than
175// RVVBitsPerBlock.
176unsigned MinElts =RISCV::RVVBitsPerBlock / Subtarget.getELen();
177if (VT.getVectorMinNumElements() < MinElts)
178return;
179
180unsignedSize = VT.getSizeInBits().getKnownMinValue();
181constTargetRegisterClass *RC;
182if (Size <=RISCV::RVVBitsPerBlock)
183 RC = &RISCV::VRRegClass;
184elseif (Size == 2 *RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186elseif (Size == 4 *RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188elseif (Size == 8 *RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190else
191llvm_unreachable("Unexpected size");
192
193addRegisterClass(VT, RC);
194 };
195
196for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198for (MVT VT : IntVecVTs) {
199if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201continue;
202 addRegClassForRVV(VT);
203 }
204
205if (Subtarget.hasVInstructionsF16Minimal())
206for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209if (Subtarget.hasVInstructionsBF16Minimal())
210for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213if (Subtarget.hasVInstructionsF32())
214for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217if (Subtarget.hasVInstructionsF64())
218for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221if (Subtarget.useRVVForFixedLengthVectors()) {
222auto addRegClassForFixedVectors = [this](MVT VT) {
223MVT ContainerVT =getContainerForFixedLengthVector(VT);
224unsigned RCID =getRegClassIDForVecVT(ContainerVT);
225constRISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226addRegisterClass(VT,TRI.getRegClass(RCID));
227 };
228for (MVT VT :MVT::integer_fixedlen_vector_valuetypes())
229if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
232for (MVT VT :MVT::fp_fixedlen_vector_valuetypes())
233if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271// Compute derived properties from the register classes.
272computeRegisterProperties(STI.getRegisterInfo());
273
274setStackPointerRegisterToSaveRestore(RISCV::X2);
275
276setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, XLenVT,
277 MVT::i1,Promote);
278// DAGCombiner can call isLoadExtLegal for types that aren't legal.
279setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, MVT::i32,
280 MVT::i1,Promote);
281
282// TODO: add all necessary setOperationAction calls.
283setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT,Custom);
284
285setOperationAction(ISD::BR_JT, MVT::Other,Expand);
286setOperationAction(ISD::BR_CC, XLenVT,Expand);
287setOperationAction(ISD::BRCOND, MVT::Other,Custom);
288setOperationAction(ISD::SELECT_CC, XLenVT,Expand);
289
290setCondCodeAction(ISD::SETGT, XLenVT,Custom);
291setCondCodeAction(ISD::SETGE, XLenVT,Expand);
292setCondCodeAction(ISD::SETUGT, XLenVT,Custom);
293setCondCodeAction(ISD::SETUGE, XLenVT,Expand);
294if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
295setCondCodeAction(ISD::SETULE, XLenVT,Expand);
296setCondCodeAction(ISD::SETLE, XLenVT,Expand);
297 }
298
299setOperationAction({ISD::STACKSAVE,ISD::STACKRESTORE}, MVT::Other,Expand);
300
301setOperationAction(ISD::VASTART, MVT::Other,Custom);
302setOperationAction({ISD::VAARG,ISD::VACOPY,ISD::VAEND}, MVT::Other,Expand);
303
304if (!Subtarget.hasVendorXTHeadBb())
305setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,Expand);
306
307setOperationAction(ISD::EH_DWARF_CFA, MVT::i32,Custom);
308
309if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16},Expand);
312
313if (Subtarget.is64Bit()) {
314setOperationAction(ISD::EH_DWARF_CFA, MVT::i64,Custom);
315
316setOperationAction(ISD::LOAD, MVT::i32,Custom);
317setOperationAction({ISD::ADD,ISD::SUB,ISD::SHL,ISD::SRA,ISD::SRL},
318 MVT::i32,Custom);
319setOperationAction({ISD::UADDO,ISD::USUBO}, MVT::i32,Custom);
320if (!Subtarget.hasStdExtZbb())
321setOperationAction(
322 {ISD::SADDSAT,ISD::SSUBSAT,ISD::UADDSAT,ISD::USUBSAT}, MVT::i32,
323Custom);
324setOperationAction(ISD::SADDO, MVT::i32,Custom);
325 }
326if (!Subtarget.hasStdExtZmmul()) {
327setOperationAction({ISD::MUL,ISD::MULHS,ISD::MULHU}, XLenVT,Expand);
328 }elseif (Subtarget.is64Bit()) {
329setOperationAction(ISD::MUL, MVT::i128,Custom);
330setOperationAction(ISD::MUL, MVT::i32,Custom);
331 }else {
332setOperationAction(ISD::MUL, MVT::i64,Custom);
333 }
334
335if (!Subtarget.hasStdExtM()) {
336setOperationAction({ISD::SDIV,ISD::UDIV,ISD::SREM,ISD::UREM}, XLenVT,
337Expand);
338 }elseif (Subtarget.is64Bit()) {
339setOperationAction({ISD::SDIV,ISD::UDIV,ISD::UREM},
340 {MVT::i8, MVT::i16, MVT::i32},Custom);
341 }
342
343setOperationAction(
344 {ISD::SDIVREM,ISD::UDIVREM,ISD::SMUL_LOHI,ISD::UMUL_LOHI}, XLenVT,
345Expand);
346
347setOperationAction({ISD::SHL_PARTS,ISD::SRL_PARTS,ISD::SRA_PARTS}, XLenVT,
348Custom);
349
350if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351if (Subtarget.is64Bit())
352setOperationAction({ISD::ROTL,ISD::ROTR}, MVT::i32,Custom);
353 }elseif (Subtarget.hasVendorXTHeadBb()) {
354if (Subtarget.is64Bit())
355setOperationAction({ISD::ROTL,ISD::ROTR}, MVT::i32,Custom);
356setOperationAction({ISD::ROTL,ISD::ROTR}, XLenVT,Custom);
357 }elseif (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
358setOperationAction(ISD::ROTL, XLenVT,Expand);
359 }else {
360setOperationAction({ISD::ROTL,ISD::ROTR}, XLenVT,Expand);
361 }
362
363// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364// pattern match it directly in isel.
365setOperationAction(ISD::BSWAP, XLenVT,
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ?Legal
369 :Expand);
370
371if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
372setOperationAction(ISD::BITREVERSE, XLenVT,Legal);
373 }else {
374// Zbkb can use rev8+brev8 to implement bitreverse.
375setOperationAction(ISD::BITREVERSE, XLenVT,
376 Subtarget.hasStdExtZbkb() ?Custom :Expand);
377 }
378
379if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
381setOperationAction({ISD::SMIN,ISD::SMAX,ISD::UMIN,ISD::UMAX}, XLenVT,
382Legal);
383 }
384
385if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387if (Subtarget.is64Bit())
388setOperationAction({ISD::CTTZ,ISD::CTTZ_ZERO_UNDEF}, MVT::i32,Custom);
389 }else {
390setOperationAction({ISD::CTTZ,ISD::CTPOP}, XLenVT,Expand);
391 }
392
393if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395// We need the custom lowering to make sure that the resulting sequence
396// for the 32bit case is efficient on 64bit targets.
397if (Subtarget.is64Bit())
398setOperationAction({ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF}, MVT::i32,Custom);
399 }else {
400setOperationAction(ISD::CTLZ, XLenVT,Expand);
401 }
402
403if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
404setOperationAction(ISD::ABS, XLenVT,Legal);
405 }elseif (Subtarget.hasShortForwardBranchOpt()) {
406// We can use PseudoCCSUB to implement ABS.
407setOperationAction(ISD::ABS, XLenVT,Legal);
408 }elseif (Subtarget.is64Bit()) {
409setOperationAction(ISD::ABS, MVT::i32,Custom);
410 }
411
412if (Subtarget.useCCMovInsn())
413setOperationAction(ISD::SELECT, XLenVT,Legal);
414elseif (!Subtarget.hasVendorXTHeadCondMov())
415setOperationAction(ISD::SELECT, XLenVT,Custom);
416
417staticconstunsigned FPLegalNodeTypes[] = {
418ISD::FMINNUM,ISD::FMAXNUM,ISD::FMINIMUMNUM,
419ISD::FMAXIMUMNUM,ISD::LRINT,ISD::LLRINT,
420ISD::LROUND,ISD::LLROUND,ISD::STRICT_LRINT,
421ISD::STRICT_LLRINT,ISD::STRICT_LROUND,ISD::STRICT_LLROUND,
422ISD::STRICT_FMA,ISD::STRICT_FADD,ISD::STRICT_FSUB,
423ISD::STRICT_FMUL,ISD::STRICT_FDIV,ISD::STRICT_FSQRT,
424ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,ISD::FCANONICALIZE};
425
426staticconstISD::CondCode FPCCToExpand[] = {
427ISD::SETOGT,ISD::SETOGE,ISD::SETONE,ISD::SETUEQ,ISD::SETUGT,
428ISD::SETUGE,ISD::SETULT,ISD::SETULE,ISD::SETUNE,ISD::SETGT,
429ISD::SETGE,ISD::SETNE,ISD::SETO,ISD::SETUO};
430
431staticconstunsigned FPOpToExpand[] = {
432ISD::FSIN,ISD::FCOS,ISD::FSINCOS,ISD::FPOW,
433ISD::FREM};
434
435staticconstunsigned FPRndMode[] = {
436ISD::FCEIL,ISD::FFLOOR,ISD::FTRUNC,ISD::FRINT,ISD::FROUND,
437ISD::FROUNDEVEN};
438
439staticconstunsigned ZfhminZfbfminPromoteOps[] = {
440ISD::FMINNUM,ISD::FMAXNUM,ISD::FMAXIMUMNUM,
441ISD::FMINIMUMNUM,ISD::FADD,ISD::FSUB,
442ISD::FMUL,ISD::FMA,ISD::FDIV,
443ISD::FSQRT,ISD::STRICT_FMA,ISD::STRICT_FADD,
444ISD::STRICT_FSUB,ISD::STRICT_FMUL,ISD::STRICT_FDIV,
445ISD::STRICT_FSQRT,ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,
446ISD::SETCC,ISD::FCEIL,ISD::FFLOOR,
447ISD::FTRUNC,ISD::FRINT,ISD::FROUND,
448ISD::FROUNDEVEN,ISD::FCANONICALIZE};
449
450if (Subtarget.hasStdExtZfbfmin()) {
451setOperationAction(ISD::BITCAST, MVT::i16,Custom);
452setOperationAction(ISD::ConstantFP, MVT::bf16,Expand);
453setOperationAction(ISD::SELECT_CC, MVT::bf16,Expand);
454setOperationAction(ISD::SELECT, MVT::bf16,Custom);
455setOperationAction(ISD::BR_CC, MVT::bf16,Expand);
456setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16,Promote);
457setOperationAction(ISD::FREM, MVT::bf16,Promote);
458setOperationAction(ISD::FABS, MVT::bf16,Custom);
459setOperationAction(ISD::FNEG, MVT::bf16,Custom);
460setOperationAction(ISD::FCOPYSIGN, MVT::bf16,Custom);
461setOperationAction({ISD::FP_TO_SINT,ISD::FP_TO_UINT}, XLenVT,Custom);
462setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP}, XLenVT,Custom);
463 }
464
465if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
466if (Subtarget.hasStdExtZfhOrZhinx()) {
467setOperationAction(FPLegalNodeTypes, MVT::f16,Legal);
468setOperationAction(FPRndMode, MVT::f16,
469 Subtarget.hasStdExtZfa() ?Legal :Custom);
470setOperationAction(ISD::IS_FPCLASS, MVT::f16,Custom);
471setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f16,
472 Subtarget.hasStdExtZfa() ?Legal :Custom);
473if (Subtarget.hasStdExtZfa())
474setOperationAction(ISD::ConstantFP, MVT::f16,Custom);
475 }else {
476setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16,Promote);
477setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f16,Promote);
478for (autoOp : {ISD::LROUND,ISD::LLROUND,ISD::LRINT,ISD::LLRINT,
479ISD::STRICT_LROUND,ISD::STRICT_LLROUND,
480ISD::STRICT_LRINT,ISD::STRICT_LLRINT})
481setOperationAction(Op, MVT::f16,Custom);
482setOperationAction(ISD::FABS, MVT::f16,Custom);
483setOperationAction(ISD::FNEG, MVT::f16,Custom);
484setOperationAction(ISD::FCOPYSIGN, MVT::f16,Custom);
485setOperationAction({ISD::FP_TO_SINT,ISD::FP_TO_UINT}, XLenVT,Custom);
486setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP}, XLenVT,Custom);
487 }
488
489setOperationAction(ISD::BITCAST, MVT::i16,Custom);
490
491setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16,Legal);
492setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32,Legal);
493setCondCodeAction(FPCCToExpand, MVT::f16,Expand);
494setOperationAction(ISD::SELECT_CC, MVT::f16,Expand);
495setOperationAction(ISD::SELECT, MVT::f16,Custom);
496setOperationAction(ISD::BR_CC, MVT::f16,Expand);
497
498setOperationAction(
499ISD::FNEARBYINT, MVT::f16,
500 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ?Legal :Promote);
501setOperationAction({ISD::FREM,ISD::FPOW,ISD::FPOWI,
502ISD::FCOS,ISD::FSIN,ISD::FSINCOS,ISD::FEXP,
503ISD::FEXP2,ISD::FEXP10,ISD::FLOG,ISD::FLOG2,
504ISD::FLOG10,ISD::FLDEXP,ISD::FFREXP},
505 MVT::f16,Promote);
506
507// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
508// complete support for all operations in LegalizeDAG.
509setOperationAction({ISD::STRICT_FCEIL,ISD::STRICT_FFLOOR,
510ISD::STRICT_FNEARBYINT,ISD::STRICT_FRINT,
511ISD::STRICT_FROUND,ISD::STRICT_FROUNDEVEN,
512ISD::STRICT_FTRUNC,ISD::STRICT_FLDEXP},
513 MVT::f16,Promote);
514
515// We need to custom promote this.
516if (Subtarget.is64Bit())
517setOperationAction(ISD::FPOWI, MVT::i32,Custom);
518 }
519
520if (Subtarget.hasStdExtFOrZfinx()) {
521setOperationAction(FPLegalNodeTypes, MVT::f32,Legal);
522setOperationAction(FPRndMode, MVT::f32,
523 Subtarget.hasStdExtZfa() ?Legal :Custom);
524setCondCodeAction(FPCCToExpand, MVT::f32,Expand);
525setOperationAction(ISD::SELECT_CC, MVT::f32,Expand);
526setOperationAction(ISD::SELECT, MVT::f32,Custom);
527setOperationAction(ISD::BR_CC, MVT::f32,Expand);
528setOperationAction(FPOpToExpand, MVT::f32,Expand);
529setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16,Expand);
530setTruncStoreAction(MVT::f32, MVT::f16,Expand);
531setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16,Expand);
532setTruncStoreAction(MVT::f32, MVT::bf16,Expand);
533setOperationAction(ISD::IS_FPCLASS, MVT::f32,Custom);
534setOperationAction(ISD::BF16_TO_FP, MVT::f32,Custom);
535setOperationAction(ISD::FP_TO_BF16, MVT::f32,
536 Subtarget.isSoftFPABI() ?LibCall :Custom);
537setOperationAction(ISD::FP_TO_FP16, MVT::f32,Custom);
538setOperationAction(ISD::FP16_TO_FP, MVT::f32,Custom);
539setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32,Custom);
540setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32,Custom);
541
542if (Subtarget.hasStdExtZfa()) {
543setOperationAction(ISD::ConstantFP, MVT::f32,Custom);
544setOperationAction(ISD::FNEARBYINT, MVT::f32,Legal);
545setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f32,Legal);
546 }else {
547setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f32,Custom);
548 }
549 }
550
551if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
552setOperationAction(ISD::BITCAST, MVT::i32,Custom);
553
554if (Subtarget.hasStdExtDOrZdinx()) {
555setOperationAction(FPLegalNodeTypes, MVT::f64,Legal);
556
557if (!Subtarget.is64Bit())
558setOperationAction(ISD::BITCAST, MVT::i64,Custom);
559
560if (Subtarget.hasStdExtZfa()) {
561setOperationAction(ISD::ConstantFP, MVT::f64,Custom);
562setOperationAction(FPRndMode, MVT::f64,Legal);
563setOperationAction(ISD::FNEARBYINT, MVT::f64,Legal);
564setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f64,Legal);
565 }else {
566if (Subtarget.is64Bit())
567setOperationAction(FPRndMode, MVT::f64,Custom);
568
569setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f64,Custom);
570 }
571
572setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32,Legal);
573setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64,Legal);
574setCondCodeAction(FPCCToExpand, MVT::f64,Expand);
575setOperationAction(ISD::SELECT_CC, MVT::f64,Expand);
576setOperationAction(ISD::SELECT, MVT::f64,Custom);
577setOperationAction(ISD::BR_CC, MVT::f64,Expand);
578setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32,Expand);
579setTruncStoreAction(MVT::f64, MVT::f32,Expand);
580setOperationAction(FPOpToExpand, MVT::f64,Expand);
581setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16,Expand);
582setTruncStoreAction(MVT::f64, MVT::f16,Expand);
583setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16,Expand);
584setTruncStoreAction(MVT::f64, MVT::bf16,Expand);
585setOperationAction(ISD::IS_FPCLASS, MVT::f64,Custom);
586setOperationAction(ISD::BF16_TO_FP, MVT::f64,Custom);
587setOperationAction(ISD::FP_TO_BF16, MVT::f64,
588 Subtarget.isSoftFPABI() ?LibCall :Custom);
589setOperationAction(ISD::FP_TO_FP16, MVT::f64,Custom);
590setOperationAction(ISD::FP16_TO_FP, MVT::f64,Expand);
591setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64,Custom);
592setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64,Expand);
593 }
594
595if (Subtarget.is64Bit()) {
596setOperationAction({ISD::FP_TO_UINT,ISD::FP_TO_SINT,
597ISD::STRICT_FP_TO_UINT,ISD::STRICT_FP_TO_SINT},
598 MVT::i32,Custom);
599setOperationAction(ISD::LROUND, MVT::i32,Custom);
600 }
601
602if (Subtarget.hasStdExtFOrZfinx()) {
603setOperationAction({ISD::FP_TO_UINT_SAT,ISD::FP_TO_SINT_SAT}, XLenVT,
604Custom);
605
606// f16/bf16 require custom handling.
607setOperationAction({ISD::STRICT_FP_TO_UINT,ISD::STRICT_FP_TO_SINT}, XLenVT,
608Custom);
609setOperationAction({ISD::STRICT_UINT_TO_FP,ISD::STRICT_SINT_TO_FP}, XLenVT,
610Custom);
611
612setOperationAction(ISD::GET_ROUNDING, XLenVT,Custom);
613setOperationAction(ISD::SET_ROUNDING, MVT::Other,Custom);
614 }
615
616setOperationAction({ISD::GlobalAddress,ISD::BlockAddress,ISD::ConstantPool,
617ISD::JumpTable},
618 XLenVT,Custom);
619
620setOperationAction(ISD::GlobalTLSAddress, XLenVT,Custom);
621
622if (Subtarget.is64Bit())
623setOperationAction(ISD::Constant, MVT::i64,Custom);
624
625// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
626// Unfortunately this can't be determined just from the ISA naming string.
627setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
628 Subtarget.is64Bit() ?Legal :Custom);
629setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
630 Subtarget.is64Bit() ?Legal :Custom);
631
632if (Subtarget.is64Bit()) {
633setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other,Custom);
634setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other,Custom);
635 }
636
637setOperationAction({ISD::TRAP,ISD::DEBUGTRAP}, MVT::Other,Legal);
638setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other,Custom);
639if (Subtarget.is64Bit())
640setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32,Custom);
641
642if (Subtarget.hasStdExtZicbop()) {
643setOperationAction(ISD::PREFETCH, MVT::Other,Legal);
644 }
645
646if (Subtarget.hasStdExtA()) {
647setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
648if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
649setMinCmpXchgSizeInBits(8);
650else
651setMinCmpXchgSizeInBits(32);
652 }elseif (Subtarget.hasForcedAtomics()) {
653setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
654 }else {
655setMaxAtomicSizeInBitsSupported(0);
656 }
657
658setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,Custom);
659
660setBooleanContents(ZeroOrOneBooleanContent);
661
662if (getTargetMachine().getTargetTriple().isOSLinux()) {
663// Custom lowering of llvm.clear_cache.
664setOperationAction(ISD::CLEAR_CACHE, MVT::Other,Custom);
665 }
666
667if (Subtarget.hasVInstructions()) {
668setBooleanVectorContents(ZeroOrOneBooleanContent);
669
670setOperationAction(ISD::VSCALE, XLenVT,Custom);
671
672// RVV intrinsics may have illegal operands.
673// We also need to custom legalize vmv.x.s.
674setOperationAction({ISD::INTRINSIC_WO_CHAIN,ISD::INTRINSIC_W_CHAIN,
675ISD::INTRINSIC_VOID},
676 {MVT::i8, MVT::i16},Custom);
677if (Subtarget.is64Bit())
678setOperationAction({ISD::INTRINSIC_W_CHAIN,ISD::INTRINSIC_VOID},
679 MVT::i32,Custom);
680else
681setOperationAction({ISD::INTRINSIC_WO_CHAIN,ISD::INTRINSIC_W_CHAIN},
682 MVT::i64,Custom);
683
684setOperationAction({ISD::INTRINSIC_W_CHAIN,ISD::INTRINSIC_VOID},
685 MVT::Other,Custom);
686
687staticconstunsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
702 ISD::EXPERIMENTAL_VP_SPLAT};
703
704staticconstunsigned FloatingPointVPOps[] = {
705 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
706 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
707 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
708 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
709 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
710 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
711 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
712 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
713 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
714 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
715 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
716 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
717 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
718 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
719
720staticconstunsigned IntegerVecReduceOps[] = {
721ISD::VECREDUCE_ADD,ISD::VECREDUCE_AND,ISD::VECREDUCE_OR,
722ISD::VECREDUCE_XOR,ISD::VECREDUCE_SMAX,ISD::VECREDUCE_SMIN,
723ISD::VECREDUCE_UMAX,ISD::VECREDUCE_UMIN};
724
725staticconstunsigned FloatingPointVecReduceOps[] = {
726ISD::VECREDUCE_FADD,ISD::VECREDUCE_SEQ_FADD,ISD::VECREDUCE_FMIN,
727ISD::VECREDUCE_FMAX,ISD::VECREDUCE_FMINIMUM,ISD::VECREDUCE_FMAXIMUM};
728
729staticconstunsigned FloatingPointLibCallOps[] = {
730ISD::FREM,ISD::FPOW,ISD::FCOS,ISD::FSIN,ISD::FSINCOS,ISD::FEXP,
731ISD::FEXP2,ISD::FEXP10,ISD::FLOG,ISD::FLOG2,ISD::FLOG10};
732
733if (!Subtarget.is64Bit()) {
734// We must custom-lower certain vXi64 operations on RV32 due to the vector
735// element type being illegal.
736setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT},
737 MVT::i64,Custom);
738
739setOperationAction(IntegerVecReduceOps, MVT::i64,Custom);
740
741setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
742 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
743 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
744 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
745 MVT::i64,Custom);
746 }
747
748for (MVT VT : BoolVecVTs) {
749if (!isTypeLegal(VT))
750continue;
751
752setOperationAction(ISD::SPLAT_VECTOR, VT,Custom);
753
754// Mask VTs are custom-expanded into a series of standard nodes
755setOperationAction({ISD::TRUNCATE,ISD::CONCAT_VECTORS,
756ISD::INSERT_SUBVECTOR,ISD::EXTRACT_SUBVECTOR,
757ISD::SCALAR_TO_VECTOR},
758 VT,Custom);
759
760setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT}, VT,
761Custom);
762
763setOperationAction(ISD::SELECT, VT,Custom);
764setOperationAction({ISD::SELECT_CC,ISD::VSELECT, ISD::VP_SELECT}, VT,
765Expand);
766setOperationAction(ISD::VP_MERGE, VT,Custom);
767
768setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
769Custom);
770
771setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT,Custom);
772
773setOperationAction(
774 {ISD::VECREDUCE_AND,ISD::VECREDUCE_OR,ISD::VECREDUCE_XOR}, VT,
775Custom);
776
777setOperationAction(
778 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
779Custom);
780
781// RVV has native int->float & float->int conversions where the
782// element type sizes are within one power-of-two of each other. Any
783// wider distances between type sizes have to be lowered as sequences
784// which progressively narrow the gap in stages.
785setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP,ISD::FP_TO_SINT,
786ISD::FP_TO_UINT,ISD::STRICT_SINT_TO_FP,
787ISD::STRICT_UINT_TO_FP,ISD::STRICT_FP_TO_SINT,
788ISD::STRICT_FP_TO_UINT},
789 VT,Custom);
790setOperationAction({ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT}, VT,
791Custom);
792
793// Expand all extending loads to types larger than this, and truncating
794// stores from types larger than this.
795for (MVT OtherVT :MVT::integer_scalable_vector_valuetypes()) {
796setTruncStoreAction(VT, OtherVT,Expand);
797setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, VT,
798 OtherVT,Expand);
799 }
800
801setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
802 ISD::VP_TRUNCATE, ISD::VP_SETCC},
803 VT,Custom);
804
805setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT,Custom);
806setOperationAction(ISD::VECTOR_INTERLEAVE, VT,Custom);
807
808setOperationAction(ISD::VECTOR_REVERSE, VT,Custom);
809
810setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT,Custom);
811setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT,Custom);
812
813setOperationPromotedToType(
814ISD::VECTOR_SPLICE, VT,
815MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
816 }
817
818for (MVT VT : IntVecVTs) {
819if (!isTypeLegal(VT))
820continue;
821
822setOperationAction(ISD::SPLAT_VECTOR, VT,Legal);
823setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT,Custom);
824
825// Vectors implement MULHS/MULHU.
826setOperationAction({ISD::SMUL_LOHI,ISD::UMUL_LOHI}, VT,Expand);
827
828// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
829if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
830setOperationAction({ISD::MULHU,ISD::MULHS}, VT,Expand);
831
832setOperationAction({ISD::SMIN,ISD::SMAX,ISD::UMIN,ISD::UMAX}, VT,
833Legal);
834
835setOperationAction({ISD::ABDS,ISD::ABDU}, VT,Custom);
836
837// Custom-lower extensions and truncations from/to mask types.
838setOperationAction({ISD::ANY_EXTEND,ISD::SIGN_EXTEND,ISD::ZERO_EXTEND},
839 VT,Custom);
840
841// RVV has native int->float & float->int conversions where the
842// element type sizes are within one power-of-two of each other. Any
843// wider distances between type sizes have to be lowered as sequences
844// which progressively narrow the gap in stages.
845setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP,ISD::FP_TO_SINT,
846ISD::FP_TO_UINT,ISD::STRICT_SINT_TO_FP,
847ISD::STRICT_UINT_TO_FP,ISD::STRICT_FP_TO_SINT,
848ISD::STRICT_FP_TO_UINT},
849 VT,Custom);
850setOperationAction({ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT}, VT,
851Custom);
852setOperationAction({ISD::AVGFLOORS,ISD::AVGFLOORU,ISD::AVGCEILS,
853ISD::AVGCEILU,ISD::SADDSAT,ISD::UADDSAT,
854ISD::SSUBSAT,ISD::USUBSAT},
855 VT,Legal);
856
857// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
858// nodes which truncate by one power of two at a time.
859setOperationAction(
860 {ISD::TRUNCATE,ISD::TRUNCATE_SSAT_S,ISD::TRUNCATE_USAT_U}, VT,
861Custom);
862
863// Custom-lower insert/extract operations to simplify patterns.
864setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT}, VT,
865Custom);
866
867// Custom-lower reduction operations to set up the corresponding custom
868// nodes' operands.
869setOperationAction(IntegerVecReduceOps, VT,Custom);
870
871setOperationAction(IntegerVPOps, VT,Custom);
872
873setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);
874
875setOperationAction({ISD::MLOAD,ISD::MSTORE,ISD::MGATHER,ISD::MSCATTER},
876 VT,Custom);
877
878setOperationAction(
879 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
880 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
881 VT,Custom);
882
883setOperationAction({ISD::CONCAT_VECTORS,ISD::INSERT_SUBVECTOR,
884ISD::EXTRACT_SUBVECTOR,ISD::SCALAR_TO_VECTOR},
885 VT,Custom);
886
887setOperationAction(ISD::SELECT, VT,Custom);
888setOperationAction(ISD::SELECT_CC, VT,Expand);
889
890setOperationAction({ISD::STEP_VECTOR,ISD::VECTOR_REVERSE}, VT,Custom);
891
892for (MVT OtherVT :MVT::integer_scalable_vector_valuetypes()) {
893setTruncStoreAction(VT, OtherVT,Expand);
894setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, VT,
895 OtherVT,Expand);
896 }
897
898setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT,Custom);
899setOperationAction(ISD::VECTOR_INTERLEAVE, VT,Custom);
900
901// Splice
902setOperationAction(ISD::VECTOR_SPLICE, VT,Custom);
903
904if (Subtarget.hasStdExtZvkb()) {
905setOperationAction(ISD::BSWAP, VT,Legal);
906setOperationAction(ISD::VP_BSWAP, VT,Custom);
907 }else {
908setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT,Expand);
909setOperationAction({ISD::ROTL,ISD::ROTR}, VT,Expand);
910 }
911
912if (Subtarget.hasStdExtZvbb()) {
913setOperationAction(ISD::BITREVERSE, VT,Legal);
914setOperationAction(ISD::VP_BITREVERSE, VT,Custom);
915setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
916 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
917 VT,Custom);
918 }else {
919setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT,Expand);
920setOperationAction({ISD::CTLZ,ISD::CTTZ,ISD::CTPOP}, VT,Expand);
921setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
922 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
923 VT,Expand);
924
925// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
926// range of f32.
927EVT FloatVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
928if (isTypeLegal(FloatVT)) {
929setOperationAction({ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF,
930ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
931 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
932 VT,Custom);
933 }
934 }
935
936setOperationAction(ISD::VECTOR_COMPRESS, VT,Custom);
937 }
938
939for (MVT VT : VecTupleVTs) {
940if (!isTypeLegal(VT))
941continue;
942
943setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);
944 }
945
946// Expand various CCs to best match the RVV ISA, which natively supports UNE
947// but no other unordered comparisons, and supports all ordered comparisons
948// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
949// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
950// and we pattern-match those back to the "original", swapping operands once
951// more. This way we catch both operations and both "vf" and "fv" forms with
952// fewer patterns.
953staticconstISD::CondCode VFPCCToExpand[] = {
954ISD::SETO,ISD::SETONE,ISD::SETUEQ,ISD::SETUGT,
955ISD::SETUGE,ISD::SETULT,ISD::SETULE,ISD::SETUO,
956ISD::SETGT,ISD::SETOGT,ISD::SETGE,ISD::SETOGE,
957 };
958
959// TODO: support more ops.
960staticconstunsigned ZvfhminZvfbfminPromoteOps[] = {
961ISD::FMINNUM,ISD::FMAXNUM,ISD::FADD,ISD::FSUB,
962ISD::FMUL,ISD::FMA,ISD::FDIV,ISD::FSQRT,
963ISD::FCEIL,ISD::FTRUNC,ISD::FFLOOR,ISD::FROUND,
964ISD::FROUNDEVEN,ISD::FRINT,ISD::FNEARBYINT,ISD::IS_FPCLASS,
965ISD::SETCC,ISD::FMAXIMUM,ISD::FMINIMUM,ISD::STRICT_FADD,
966ISD::STRICT_FSUB,ISD::STRICT_FMUL,ISD::STRICT_FDIV,ISD::STRICT_FSQRT,
967ISD::STRICT_FMA};
968
969// TODO: support more vp ops.
970staticconstunsigned ZvfhminZvfbfminPromoteVPOps[] = {
971 ISD::VP_FADD,
972 ISD::VP_FSUB,
973 ISD::VP_FMUL,
974 ISD::VP_FDIV,
975 ISD::VP_FMA,
976 ISD::VP_REDUCE_FMIN,
977 ISD::VP_REDUCE_FMAX,
978 ISD::VP_SQRT,
979 ISD::VP_FMINNUM,
980 ISD::VP_FMAXNUM,
981 ISD::VP_FCEIL,
982 ISD::VP_FFLOOR,
983 ISD::VP_FROUND,
984 ISD::VP_FROUNDEVEN,
985 ISD::VP_FROUNDTOZERO,
986 ISD::VP_FRINT,
987 ISD::VP_FNEARBYINT,
988 ISD::VP_SETCC,
989 ISD::VP_FMINIMUM,
990 ISD::VP_FMAXIMUM,
991 ISD::VP_REDUCE_FMINIMUM,
992 ISD::VP_REDUCE_FMAXIMUM};
993
994// Sets common operation actions on RVV floating-point vector types.
995constauto SetCommonVFPActions = [&](MVT VT) {
996setOperationAction(ISD::SPLAT_VECTOR, VT,Legal);
997// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
998// sizes are within one power-of-two of each other. Therefore conversions
999// between vXf16 and vXf64 must be lowered as sequences which convert via
1000// vXf32.
1001setOperationAction({ISD::FP_ROUND,ISD::FP_EXTEND}, VT,Custom);
1002setOperationAction({ISD::LRINT,ISD::LLRINT}, VT,Custom);
1003// Custom-lower insert/extract operations to simplify patterns.
1004setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT}, VT,
1005Custom);
1006// Expand various condition codes (explained above).
1007setCondCodeAction(VFPCCToExpand, VT,Expand);
1008
1009setOperationAction({ISD::FMINNUM,ISD::FMAXNUM}, VT,Legal);
1010setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, VT,Custom);
1011
1012setOperationAction({ISD::FTRUNC,ISD::FCEIL,ISD::FFLOOR,ISD::FROUND,
1013ISD::FROUNDEVEN,ISD::FRINT,ISD::FNEARBYINT,
1014ISD::IS_FPCLASS},
1015 VT,Custom);
1016
1017setOperationAction(FloatingPointVecReduceOps, VT,Custom);
1018
1019// Expand FP operations that need libcalls.
1020setOperationAction(FloatingPointLibCallOps, VT,Expand);
1021
1022setOperationAction(ISD::FCOPYSIGN, VT,Legal);
1023
1024setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);
1025
1026setOperationAction({ISD::MLOAD,ISD::MSTORE,ISD::MGATHER,ISD::MSCATTER},
1027 VT,Custom);
1028
1029setOperationAction(
1030 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1031 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1032 VT,Custom);
1033
1034setOperationAction(ISD::SELECT, VT,Custom);
1035setOperationAction(ISD::SELECT_CC, VT,Expand);
1036
1037setOperationAction({ISD::CONCAT_VECTORS,ISD::INSERT_SUBVECTOR,
1038ISD::EXTRACT_SUBVECTOR,ISD::SCALAR_TO_VECTOR},
1039 VT,Custom);
1040
1041setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT,Custom);
1042setOperationAction(ISD::VECTOR_INTERLEAVE, VT,Custom);
1043
1044setOperationAction({ISD::VECTOR_REVERSE,ISD::VECTOR_SPLICE}, VT,Custom);
1045
1046setOperationAction(FloatingPointVPOps, VT,Custom);
1047
1048setOperationAction({ISD::STRICT_FP_EXTEND,ISD::STRICT_FP_ROUND}, VT,
1049Custom);
1050setOperationAction({ISD::STRICT_FADD,ISD::STRICT_FSUB,ISD::STRICT_FMUL,
1051ISD::STRICT_FDIV,ISD::STRICT_FSQRT,ISD::STRICT_FMA},
1052 VT,Legal);
1053setOperationAction({ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,
1054ISD::STRICT_FTRUNC,ISD::STRICT_FCEIL,
1055ISD::STRICT_FFLOOR,ISD::STRICT_FROUND,
1056ISD::STRICT_FROUNDEVEN,ISD::STRICT_FNEARBYINT},
1057 VT,Custom);
1058
1059setOperationAction(ISD::VECTOR_COMPRESS, VT,Custom);
1060 };
1061
1062// Sets common extload/truncstore actions on RVV floating-point vector
1063// types.
1064constauto SetCommonVFPExtLoadTruncStoreActions =
1065 [&](MVT VT,ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1066for (auto SmallVT : SmallerVTs) {
1067setTruncStoreAction(VT, SmallVT,Expand);
1068setLoadExtAction(ISD::EXTLOAD, VT, SmallVT,Expand);
1069 }
1070 };
1071
1072// Sets common actions for f16 and bf16 for when there's only
1073// zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1074constauto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075setOperationAction({ISD::FP_ROUND,ISD::FP_EXTEND}, VT,Custom);
1076setOperationAction({ISD::STRICT_FP_ROUND,ISD::STRICT_FP_EXTEND}, VT,
1077Custom);
1078setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT,Custom);
1079setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT,ISD::SELECT}, VT,
1080Custom);
1081setOperationAction(ISD::SELECT_CC, VT,Expand);
1082setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,Custom);
1083setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::CONCAT_VECTORS,
1084ISD::INSERT_SUBVECTOR,ISD::EXTRACT_SUBVECTOR,
1085ISD::VECTOR_DEINTERLEAVE,ISD::VECTOR_INTERLEAVE,
1086ISD::VECTOR_REVERSE,ISD::VECTOR_SPLICE,
1087ISD::VECTOR_COMPRESS},
1088 VT,Custom);
1089MVT EltVT = VT.getVectorElementType();
1090if (isTypeLegal(EltVT))
1091setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1092ISD::EXTRACT_VECTOR_ELT},
1093 VT,Custom);
1094else
1095setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1096 EltVT,Custom);
1097setOperationAction({ISD::LOAD,ISD::STORE,ISD::MLOAD,ISD::MSTORE,
1098ISD::MGATHER,ISD::MSCATTER, ISD::VP_LOAD,
1099 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1100 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1101 ISD::VP_SCATTER},
1102 VT,Custom);
1103
1104setOperationAction(ISD::FNEG, VT,Expand);
1105setOperationAction(ISD::FABS, VT,Expand);
1106setOperationAction(ISD::FCOPYSIGN, VT,Expand);
1107
1108// Expand FP operations that need libcalls.
1109setOperationAction(FloatingPointLibCallOps, VT,Expand);
1110
1111// Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1112if (getLMUL(VT) ==RISCVII::VLMUL::LMUL_8) {
1113setOperationAction(ZvfhminZvfbfminPromoteOps, VT,Custom);
1114setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT,Custom);
1115 }else {
1116MVT F32VecVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1117setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1118setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1119 }
1120 };
1121
1122if (Subtarget.hasVInstructionsF16()) {
1123for (MVT VT : F16VecVTs) {
1124if (!isTypeLegal(VT))
1125continue;
1126 SetCommonVFPActions(VT);
1127 }
1128 }elseif (Subtarget.hasVInstructionsF16Minimal()) {
1129for (MVT VT : F16VecVTs) {
1130if (!isTypeLegal(VT))
1131continue;
1132 SetCommonPromoteToF32Actions(VT);
1133 }
1134 }
1135
1136if (Subtarget.hasVInstructionsBF16Minimal()) {
1137for (MVT VT : BF16VecVTs) {
1138if (!isTypeLegal(VT))
1139continue;
1140 SetCommonPromoteToF32Actions(VT);
1141 }
1142 }
1143
1144if (Subtarget.hasVInstructionsF32()) {
1145for (MVT VT : F32VecVTs) {
1146if (!isTypeLegal(VT))
1147continue;
1148 SetCommonVFPActions(VT);
1149 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1150 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1151 }
1152 }
1153
1154if (Subtarget.hasVInstructionsF64()) {
1155for (MVT VT : F64VecVTs) {
1156if (!isTypeLegal(VT))
1157continue;
1158 SetCommonVFPActions(VT);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1160 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1161 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1162 }
1163 }
1164
1165if (Subtarget.useRVVForFixedLengthVectors()) {
1166for (MVT VT :MVT::integer_fixedlen_vector_valuetypes()) {
1167if (!useRVVForFixedLengthVectorVT(VT))
1168continue;
1169
1170// By default everything must be expanded.
1171for (unsignedOp = 0;Op <ISD::BUILTIN_OP_END; ++Op)
1172setOperationAction(Op, VT,Expand);
1173for (MVT OtherVT :MVT::integer_fixedlen_vector_valuetypes()) {
1174setTruncStoreAction(VT, OtherVT,Expand);
1175setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, VT,
1176 OtherVT,Expand);
1177 }
1178
1179// Custom lower fixed vector undefs to scalable vector undefs to avoid
1180// expansion to a build_vector of 0s.
1181setOperationAction(ISD::UNDEF, VT,Custom);
1182
1183// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1184setOperationAction({ISD::INSERT_SUBVECTOR,ISD::EXTRACT_SUBVECTOR}, VT,
1185Custom);
1186
1187setOperationAction(
1188 {ISD::BUILD_VECTOR,ISD::CONCAT_VECTORS,ISD::VECTOR_REVERSE}, VT,
1189Custom);
1190
1191setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT},
1192 VT,Custom);
1193
1194setOperationAction(ISD::SCALAR_TO_VECTOR, VT,Custom);
1195
1196setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);
1197
1198setOperationAction(ISD::SETCC, VT,Custom);
1199
1200setOperationAction(ISD::SELECT, VT,Custom);
1201
1202setOperationAction(
1203 {ISD::TRUNCATE,ISD::TRUNCATE_SSAT_S,ISD::TRUNCATE_USAT_U}, VT,
1204Custom);
1205
1206setOperationAction(ISD::BITCAST, VT,Custom);
1207
1208setOperationAction(
1209 {ISD::VECREDUCE_AND,ISD::VECREDUCE_OR,ISD::VECREDUCE_XOR}, VT,
1210Custom);
1211
1212setOperationAction(
1213 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1214Custom);
1215
1216setOperationAction(
1217 {
1218ISD::SINT_TO_FP,
1219ISD::UINT_TO_FP,
1220ISD::FP_TO_SINT,
1221ISD::FP_TO_UINT,
1222ISD::STRICT_SINT_TO_FP,
1223ISD::STRICT_UINT_TO_FP,
1224ISD::STRICT_FP_TO_SINT,
1225ISD::STRICT_FP_TO_UINT,
1226 },
1227 VT,Custom);
1228setOperationAction({ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT}, VT,
1229Custom);
1230
1231setOperationAction(ISD::VECTOR_SHUFFLE, VT,Custom);
1232
1233// Operations below are different for between masks and other vectors.
1234if (VT.getVectorElementType() == MVT::i1) {
1235setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR,ISD::AND,
1236ISD::OR,ISD::XOR},
1237 VT,Custom);
1238
1239setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1240 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1241 VT,Custom);
1242
1243setOperationAction(ISD::VP_MERGE, VT,Custom);
1244
1245setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT,Custom);
1246setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT,Custom);
1247continue;
1248 }
1249
1250// Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1251// it before type legalization for i64 vectors on RV32. It will then be
1252// type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1253// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1254// improvements first.
1255if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256setOperationAction(ISD::SPLAT_VECTOR, VT,Legal);
1257setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT,Custom);
1258 }
1259
1260setOperationAction(
1261 {ISD::MLOAD,ISD::MSTORE,ISD::MGATHER,ISD::MSCATTER}, VT,Custom);
1262
1263setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1264 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1265 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1266 ISD::VP_SCATTER},
1267 VT,Custom);
1268
1269setOperationAction({ISD::ADD,ISD::MUL,ISD::SUB,ISD::AND,ISD::OR,
1270ISD::XOR,ISD::SDIV,ISD::SREM,ISD::UDIV,
1271ISD::UREM,ISD::SHL,ISD::SRA,ISD::SRL},
1272 VT,Custom);
1273
1274setOperationAction(
1275 {ISD::SMIN,ISD::SMAX,ISD::UMIN,ISD::UMAX,ISD::ABS}, VT,Custom);
1276
1277setOperationAction({ISD::ABDS,ISD::ABDU}, VT,Custom);
1278
1279// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1280if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1281setOperationAction({ISD::MULHS,ISD::MULHU}, VT,Custom);
1282
1283setOperationAction({ISD::AVGFLOORS,ISD::AVGFLOORU,ISD::AVGCEILS,
1284ISD::AVGCEILU,ISD::SADDSAT,ISD::UADDSAT,
1285ISD::SSUBSAT,ISD::USUBSAT},
1286 VT,Custom);
1287
1288setOperationAction(ISD::VSELECT, VT,Custom);
1289
1290setOperationAction(
1291 {ISD::ANY_EXTEND,ISD::SIGN_EXTEND,ISD::ZERO_EXTEND}, VT,Custom);
1292
1293// Custom-lower reduction operations to set up the corresponding custom
1294// nodes' operands.
1295setOperationAction({ISD::VECREDUCE_ADD,ISD::VECREDUCE_SMAX,
1296ISD::VECREDUCE_SMIN,ISD::VECREDUCE_UMAX,
1297ISD::VECREDUCE_UMIN},
1298 VT,Custom);
1299
1300setOperationAction(IntegerVPOps, VT,Custom);
1301
1302if (Subtarget.hasStdExtZvkb())
1303setOperationAction({ISD::BSWAP,ISD::ROTL,ISD::ROTR}, VT,Custom);
1304
1305if (Subtarget.hasStdExtZvbb()) {
1306setOperationAction({ISD::BITREVERSE,ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF,
1307ISD::CTTZ,ISD::CTTZ_ZERO_UNDEF,ISD::CTPOP},
1308 VT,Custom);
1309 }else {
1310// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1311// range of f32.
1312EVT FloatVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1313if (isTypeLegal(FloatVT))
1314setOperationAction(
1315 {ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF,ISD::CTTZ_ZERO_UNDEF}, VT,
1316Custom);
1317 }
1318
1319setOperationAction(ISD::VECTOR_COMPRESS, VT,Custom);
1320 }
1321
1322for (MVT VT :MVT::fp_fixedlen_vector_valuetypes()) {
1323// There are no extending loads or truncating stores.
1324for (MVT InnerVT :MVT::fp_fixedlen_vector_valuetypes()) {
1325setLoadExtAction(ISD::EXTLOAD, VT, InnerVT,Expand);
1326setTruncStoreAction(VT, InnerVT,Expand);
1327 }
1328
1329if (!useRVVForFixedLengthVectorVT(VT))
1330continue;
1331
1332// By default everything must be expanded.
1333for (unsignedOp = 0;Op <ISD::BUILTIN_OP_END; ++Op)
1334setOperationAction(Op, VT,Expand);
1335
1336// Custom lower fixed vector undefs to scalable vector undefs to avoid
1337// expansion to a build_vector of 0s.
1338setOperationAction(ISD::UNDEF, VT,Custom);
1339
1340setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT,
1341ISD::CONCAT_VECTORS,ISD::INSERT_SUBVECTOR,
1342ISD::EXTRACT_SUBVECTOR,ISD::VECTOR_REVERSE,
1343ISD::VECTOR_SHUFFLE,ISD::VECTOR_COMPRESS},
1344 VT,Custom);
1345
1346setOperationAction({ISD::LOAD,ISD::STORE,ISD::MLOAD,ISD::MSTORE,
1347ISD::MGATHER,ISD::MSCATTER},
1348 VT,Custom);
1349setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1350 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1352 VT,Custom);
1353
1354setOperationAction({ISD::FP_ROUND,ISD::FP_EXTEND}, VT,Custom);
1355setOperationAction({ISD::STRICT_FP_ROUND,ISD::STRICT_FP_EXTEND}, VT,
1356Custom);
1357
1358if (VT.getVectorElementType() == MVT::f16 &&
1359 !Subtarget.hasVInstructionsF16()) {
1360setOperationAction(ISD::BITCAST, VT,Custom);
1361setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT,Custom);
1362setOperationAction(
1363 {ISD::VP_MERGE, ISD::VP_SELECT,ISD::VSELECT,ISD::SELECT}, VT,
1364Custom);
1365setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1366Custom);
1367if (Subtarget.hasStdExtZfhmin()) {
1368setOperationAction(ISD::BUILD_VECTOR, VT,Custom);
1369 }else {
1370// We need to custom legalize f16 build vectors if Zfhmin isn't
1371// available.
1372setOperationAction(ISD::BUILD_VECTOR, MVT::f16,Custom);
1373 }
1374setOperationAction(ISD::FNEG, VT,Expand);
1375setOperationAction(ISD::FABS, VT,Expand);
1376setOperationAction(ISD::FCOPYSIGN, VT,Expand);
1377MVT F32VecVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1378// Don't promote f16 vector operations to f32 if f32 vector type is
1379// not legal.
1380// TODO: could split the f16 vector into two vectors and do promotion.
1381if (!isTypeLegal(F32VecVT))
1382continue;
1383setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1384setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1385continue;
1386 }
1387
1388if (VT.getVectorElementType() == MVT::bf16) {
1389setOperationAction(ISD::BITCAST, VT,Custom);
1390setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT,Custom);
1391if (Subtarget.hasStdExtZfbfmin()) {
1392setOperationAction(ISD::BUILD_VECTOR, VT,Custom);
1393 }else {
1394// We need to custom legalize bf16 build vectors if Zfbfmin isn't
1395// available.
1396setOperationAction(ISD::BUILD_VECTOR, MVT::bf16,Custom);
1397 }
1398setOperationAction(
1399 {ISD::VP_MERGE, ISD::VP_SELECT,ISD::VSELECT,ISD::SELECT}, VT,
1400Custom);
1401MVT F32VecVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1402// Don't promote f16 vector operations to f32 if f32 vector type is
1403// not legal.
1404// TODO: could split the f16 vector into two vectors and do promotion.
1405if (!isTypeLegal(F32VecVT))
1406continue;
1407setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1408// TODO: Promote VP ops to fp32.
1409continue;
1410 }
1411
1412setOperationAction({ISD::BUILD_VECTOR,ISD::SCALAR_TO_VECTOR}, VT,
1413Custom);
1414
1415setOperationAction({ISD::FADD,ISD::FSUB,ISD::FMUL,ISD::FDIV,
1416ISD::FNEG,ISD::FABS,ISD::FCOPYSIGN,ISD::FSQRT,
1417ISD::FMA,ISD::FMINNUM,ISD::FMAXNUM,
1418ISD::IS_FPCLASS,ISD::FMAXIMUM,ISD::FMINIMUM},
1419 VT,Custom);
1420
1421setOperationAction({ISD::FTRUNC,ISD::FCEIL,ISD::FFLOOR,ISD::FROUND,
1422ISD::FROUNDEVEN,ISD::FRINT,ISD::FNEARBYINT},
1423 VT,Custom);
1424
1425setCondCodeAction(VFPCCToExpand, VT,Expand);
1426
1427setOperationAction(ISD::SETCC, VT,Custom);
1428setOperationAction({ISD::VSELECT,ISD::SELECT}, VT,Custom);
1429
1430setOperationAction(ISD::BITCAST, VT,Custom);
1431
1432setOperationAction(FloatingPointVecReduceOps, VT,Custom);
1433
1434setOperationAction(FloatingPointVPOps, VT,Custom);
1435
1436setOperationAction(
1437 {ISD::STRICT_FADD,ISD::STRICT_FSUB,ISD::STRICT_FMUL,
1438ISD::STRICT_FDIV,ISD::STRICT_FSQRT,ISD::STRICT_FMA,
1439ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,ISD::STRICT_FTRUNC,
1440ISD::STRICT_FCEIL,ISD::STRICT_FFLOOR,ISD::STRICT_FROUND,
1441ISD::STRICT_FROUNDEVEN,ISD::STRICT_FNEARBYINT},
1442 VT,Custom);
1443 }
1444
1445// Custom-legalize bitcasts from fixed-length vectors to scalar types.
1446setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32},Custom);
1447if (Subtarget.is64Bit())
1448setOperationAction(ISD::BITCAST, MVT::i64,Custom);
1449if (Subtarget.hasStdExtZfhminOrZhinxmin())
1450setOperationAction(ISD::BITCAST, MVT::f16,Custom);
1451if (Subtarget.hasStdExtZfbfmin())
1452setOperationAction(ISD::BITCAST, MVT::bf16,Custom);
1453if (Subtarget.hasStdExtFOrZfinx())
1454setOperationAction(ISD::BITCAST, MVT::f32,Custom);
1455if (Subtarget.hasStdExtDOrZdinx())
1456setOperationAction(ISD::BITCAST, MVT::f64,Custom);
1457 }
1458 }
1459
1460if (Subtarget.hasStdExtA())
1461setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT,Expand);
1462
1463if (Subtarget.hasForcedAtomics()) {
1464// Force __sync libcalls to be emitted for atomic rmw/cas operations.
1465setOperationAction(
1466 {ISD::ATOMIC_CMP_SWAP,ISD::ATOMIC_SWAP,ISD::ATOMIC_LOAD_ADD,
1467ISD::ATOMIC_LOAD_SUB,ISD::ATOMIC_LOAD_AND,ISD::ATOMIC_LOAD_OR,
1468ISD::ATOMIC_LOAD_XOR,ISD::ATOMIC_LOAD_NAND,ISD::ATOMIC_LOAD_MIN,
1469ISD::ATOMIC_LOAD_MAX,ISD::ATOMIC_LOAD_UMIN,ISD::ATOMIC_LOAD_UMAX},
1470 XLenVT,LibCall);
1471 }
1472
1473if (Subtarget.hasVendorXTHeadMemIdx()) {
1474for (unsignedim : {ISD::PRE_INC,ISD::POST_INC}) {
1475setIndexedLoadAction(im, MVT::i8,Legal);
1476setIndexedStoreAction(im, MVT::i8,Legal);
1477setIndexedLoadAction(im, MVT::i16,Legal);
1478setIndexedStoreAction(im, MVT::i16,Legal);
1479setIndexedLoadAction(im, MVT::i32,Legal);
1480setIndexedStoreAction(im, MVT::i32,Legal);
1481
1482if (Subtarget.is64Bit()) {
1483setIndexedLoadAction(im, MVT::i64,Legal);
1484setIndexedStoreAction(im, MVT::i64,Legal);
1485 }
1486 }
1487 }
1488
1489if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1490setIndexedLoadAction(ISD::POST_INC, MVT::i8,Legal);
1491setIndexedLoadAction(ISD::POST_INC, MVT::i16,Legal);
1492setIndexedLoadAction(ISD::POST_INC, MVT::i32,Legal);
1493
1494setIndexedStoreAction(ISD::POST_INC, MVT::i8,Legal);
1495setIndexedStoreAction(ISD::POST_INC, MVT::i16,Legal);
1496setIndexedStoreAction(ISD::POST_INC, MVT::i32,Legal);
1497 }
1498
1499// Function alignments.
1500constAlign FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1501setMinFunctionAlignment(FunctionAlignment);
1502// Set preferred alignments.
1503setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1504setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1505
1506setTargetDAGCombine({ISD::INTRINSIC_VOID,ISD::INTRINSIC_W_CHAIN,
1507ISD::INTRINSIC_WO_CHAIN,ISD::ADD,ISD::SUB,ISD::MUL,
1508ISD::AND,ISD::OR,ISD::XOR,ISD::SETCC,ISD::SELECT});
1509setTargetDAGCombine(ISD::SRA);
1510setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1511
1512if (Subtarget.hasStdExtFOrZfinx())
1513setTargetDAGCombine({ISD::FADD,ISD::FMAXNUM,ISD::FMINNUM,ISD::FMUL});
1514
1515if (Subtarget.hasStdExtZbb())
1516setTargetDAGCombine({ISD::UMAX,ISD::UMIN,ISD::SMAX,ISD::SMIN});
1517
1518if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1519 Subtarget.hasVInstructions())
1520setTargetDAGCombine(ISD::TRUNCATE);
1521
1522if (Subtarget.hasStdExtZbkb())
1523setTargetDAGCombine(ISD::BITREVERSE);
1524
1525if (Subtarget.hasStdExtFOrZfinx())
1526setTargetDAGCombine({ISD::ZERO_EXTEND,ISD::FP_TO_SINT,ISD::FP_TO_UINT,
1527ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT});
1528if (Subtarget.hasVInstructions())
1529setTargetDAGCombine({ISD::FCOPYSIGN,ISD::MGATHER,
1530ISD::MSCATTER, ISD::VP_GATHER,
1531 ISD::VP_SCATTER,ISD::SRA,
1532ISD::SRL,ISD::SHL,
1533ISD::STORE,ISD::SPLAT_VECTOR,
1534ISD::BUILD_VECTOR,ISD::CONCAT_VECTORS,
1535 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE,
1536ISD::MUL,ISD::SDIV,
1537ISD::UDIV,ISD::SREM,
1538ISD::UREM,ISD::INSERT_VECTOR_ELT,
1539ISD::ABS,ISD::CTPOP,
1540ISD::VECTOR_SHUFFLE,ISD::VSELECT});
1541
1542if (Subtarget.hasVendorXTHeadMemPair())
1543setTargetDAGCombine({ISD::LOAD,ISD::STORE});
1544if (Subtarget.useRVVForFixedLengthVectors())
1545setTargetDAGCombine(ISD::BITCAST);
1546
1547setLibcallName(RTLIB::FPEXT_F16_F32,"__extendhfsf2");
1548setLibcallName(RTLIB::FPROUND_F32_F16,"__truncsfhf2");
1549
1550// Disable strict node mutation.
1551IsStrictFPEnabled =true;
1552EnableExtLdPromotion =true;
1553
1554// Let the subtarget decide if a predictable select is more expensive than the
1555// corresponding branch. This information is used in CGP/SelectOpt to decide
1556// when to convert selects into branches.
1557PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1558
1559MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1560MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1561
1562MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1563MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1564MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1565
1566MaxStoresPerMemmoveOptSize =
1567 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1568MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1569
1570MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1571MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1572}
1573
1574EVTRISCVTargetLowering::getSetCCResultType(constDataLayout &DL,
1575LLVMContext &Context,
1576EVT VT) const{
1577if (!VT.isVector())
1578returngetPointerTy(DL);
1579if (Subtarget.hasVInstructions() &&
1580 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1581returnEVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1582return VT.changeVectorElementTypeToInteger();
1583}
1584
1585MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const{
1586return Subtarget.getXLenVT();
1587}
1588
1589// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1590bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1591unsigned VF,
1592bool IsScalable) const{
1593if (!Subtarget.hasVInstructions())
1594returntrue;
1595
1596if (!IsScalable)
1597returntrue;
1598
1599if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1600returntrue;
1601
1602// Don't allow VF=1 if those types are't legal.
1603if (VF <RISCV::RVVBitsPerBlock / Subtarget.getELen())
1604returntrue;
1605
1606// VLEN=32 support is incomplete.
1607if (Subtarget.getRealMinVLen() <RISCV::RVVBitsPerBlock)
1608returntrue;
1609
1610// The maximum VF is for the smallest element width with LMUL=8.
1611// VF must be a power of 2.
1612unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1613return VF > MaxVF || !isPowerOf2_32(VF);
1614}
1615
1616boolRISCVTargetLowering::shouldExpandCttzElements(EVT VT) const{
1617return !Subtarget.hasVInstructions() ||
1618 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1619}
1620
1621boolRISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1622constCallInst &I,
1623MachineFunction &MF,
1624unsigned Intrinsic) const{
1625auto &DL =I.getDataLayout();
1626
1627auto SetRVVLoadStoreInfo = [&](unsigned PtrOp,bool IsStore,
1628bool IsUnitStrided,bool UsePtrVal =false) {
1629Info.opc = IsStore ?ISD::INTRINSIC_VOID :ISD::INTRINSIC_W_CHAIN;
1630// We can't use ptrVal if the intrinsic can access memory before the
1631// pointer. This means we can't use it for strided or indexed intrinsics.
1632if (UsePtrVal)
1633Info.ptrVal =I.getArgOperand(PtrOp);
1634else
1635Info.fallbackAddressSpace =
1636I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1637Type *MemTy;
1638if (IsStore) {
1639// Store value is the first operand.
1640 MemTy =I.getArgOperand(0)->getType();
1641 }else {
1642// Use return type. If it's segment load, return type is a struct.
1643 MemTy =I.getType();
1644if (MemTy->isStructTy())
1645 MemTy = MemTy->getStructElementType(0);
1646 }
1647if (!IsUnitStrided)
1648 MemTy = MemTy->getScalarType();
1649
1650Info.memVT =getValueType(DL, MemTy);
1651if (MemTy->isTargetExtTy()) {
1652// RISC-V vector tuple type's alignment type should be its element type.
1653if (cast<TargetExtType>(MemTy)->getName() =="riscv.vector.tuple")
1654 MemTy =Type::getIntNTy(
1655 MemTy->getContext(),
1656 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1657 ->getZExtValue());
1658Info.align =DL.getABITypeAlign(MemTy);
1659 }else {
1660Info.align =Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1661 }
1662Info.size =MemoryLocation::UnknownSize;
1663Info.flags |=
1664 IsStore ?MachineMemOperand::MOStore :MachineMemOperand::MOLoad;
1665returntrue;
1666 };
1667
1668if (I.hasMetadata(LLVMContext::MD_nontemporal))
1669Info.flags |=MachineMemOperand::MONonTemporal;
1670
1671Info.flags |=RISCVTargetLowering::getTargetMMOFlags(I);
1672switch (Intrinsic) {
1673default:
1674returnfalse;
1675case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1676case Intrinsic::riscv_masked_atomicrmw_add_i32:
1677case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1678case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1679case Intrinsic::riscv_masked_atomicrmw_max_i32:
1680case Intrinsic::riscv_masked_atomicrmw_min_i32:
1681case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1682case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1683case Intrinsic::riscv_masked_cmpxchg_i32:
1684Info.opc =ISD::INTRINSIC_W_CHAIN;
1685Info.memVT = MVT::i32;
1686Info.ptrVal =I.getArgOperand(0);
1687Info.offset = 0;
1688Info.align =Align(4);
1689Info.flags =MachineMemOperand::MOLoad |MachineMemOperand::MOStore |
1690MachineMemOperand::MOVolatile;
1691returntrue;
1692case Intrinsic::riscv_seg2_load:
1693case Intrinsic::riscv_seg3_load:
1694case Intrinsic::riscv_seg4_load:
1695case Intrinsic::riscv_seg5_load:
1696case Intrinsic::riscv_seg6_load:
1697case Intrinsic::riscv_seg7_load:
1698case Intrinsic::riscv_seg8_load:
1699return SetRVVLoadStoreInfo(/*PtrOp*/ 0,/*IsStore*/false,
1700/*IsUnitStrided*/false,/*UsePtrVal*/true);
1701case Intrinsic::riscv_seg2_store:
1702case Intrinsic::riscv_seg3_store:
1703case Intrinsic::riscv_seg4_store:
1704case Intrinsic::riscv_seg5_store:
1705case Intrinsic::riscv_seg6_store:
1706case Intrinsic::riscv_seg7_store:
1707case Intrinsic::riscv_seg8_store:
1708// Operands are (vec, ..., vec, ptr, vl)
1709return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 2,
1710/*IsStore*/true,
1711/*IsUnitStrided*/false,/*UsePtrVal*/true);
1712case Intrinsic::riscv_vle:
1713case Intrinsic::riscv_vle_mask:
1714case Intrinsic::riscv_vleff:
1715case Intrinsic::riscv_vleff_mask:
1716return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1717/*IsStore*/false,
1718/*IsUnitStrided*/true,
1719/*UsePtrVal*/true);
1720case Intrinsic::riscv_vse:
1721case Intrinsic::riscv_vse_mask:
1722return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1723/*IsStore*/true,
1724/*IsUnitStrided*/true,
1725/*UsePtrVal*/true);
1726case Intrinsic::riscv_vlse:
1727case Intrinsic::riscv_vlse_mask:
1728case Intrinsic::riscv_vloxei:
1729case Intrinsic::riscv_vloxei_mask:
1730case Intrinsic::riscv_vluxei:
1731case Intrinsic::riscv_vluxei_mask:
1732return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1733/*IsStore*/false,
1734/*IsUnitStrided*/false);
1735case Intrinsic::riscv_vsse:
1736case Intrinsic::riscv_vsse_mask:
1737case Intrinsic::riscv_vsoxei:
1738case Intrinsic::riscv_vsoxei_mask:
1739case Intrinsic::riscv_vsuxei:
1740case Intrinsic::riscv_vsuxei_mask:
1741return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1742/*IsStore*/true,
1743/*IsUnitStrided*/false);
1744case Intrinsic::riscv_vlseg2:
1745case Intrinsic::riscv_vlseg3:
1746case Intrinsic::riscv_vlseg4:
1747case Intrinsic::riscv_vlseg5:
1748case Intrinsic::riscv_vlseg6:
1749case Intrinsic::riscv_vlseg7:
1750case Intrinsic::riscv_vlseg8:
1751case Intrinsic::riscv_vlseg2ff:
1752case Intrinsic::riscv_vlseg3ff:
1753case Intrinsic::riscv_vlseg4ff:
1754case Intrinsic::riscv_vlseg5ff:
1755case Intrinsic::riscv_vlseg6ff:
1756case Intrinsic::riscv_vlseg7ff:
1757case Intrinsic::riscv_vlseg8ff:
1758return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 3,
1759/*IsStore*/false,
1760/*IsUnitStrided*/false,/*UsePtrVal*/true);
1761case Intrinsic::riscv_vlseg2_mask:
1762case Intrinsic::riscv_vlseg3_mask:
1763case Intrinsic::riscv_vlseg4_mask:
1764case Intrinsic::riscv_vlseg5_mask:
1765case Intrinsic::riscv_vlseg6_mask:
1766case Intrinsic::riscv_vlseg7_mask:
1767case Intrinsic::riscv_vlseg8_mask:
1768case Intrinsic::riscv_vlseg2ff_mask:
1769case Intrinsic::riscv_vlseg3ff_mask:
1770case Intrinsic::riscv_vlseg4ff_mask:
1771case Intrinsic::riscv_vlseg5ff_mask:
1772case Intrinsic::riscv_vlseg6ff_mask:
1773case Intrinsic::riscv_vlseg7ff_mask:
1774case Intrinsic::riscv_vlseg8ff_mask:
1775return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 5,
1776/*IsStore*/false,
1777/*IsUnitStrided*/false,/*UsePtrVal*/true);
1778case Intrinsic::riscv_vlsseg2:
1779case Intrinsic::riscv_vlsseg3:
1780case Intrinsic::riscv_vlsseg4:
1781case Intrinsic::riscv_vlsseg5:
1782case Intrinsic::riscv_vlsseg6:
1783case Intrinsic::riscv_vlsseg7:
1784case Intrinsic::riscv_vlsseg8:
1785case Intrinsic::riscv_vloxseg2:
1786case Intrinsic::riscv_vloxseg3:
1787case Intrinsic::riscv_vloxseg4:
1788case Intrinsic::riscv_vloxseg5:
1789case Intrinsic::riscv_vloxseg6:
1790case Intrinsic::riscv_vloxseg7:
1791case Intrinsic::riscv_vloxseg8:
1792case Intrinsic::riscv_vluxseg2:
1793case Intrinsic::riscv_vluxseg3:
1794case Intrinsic::riscv_vluxseg4:
1795case Intrinsic::riscv_vluxseg5:
1796case Intrinsic::riscv_vluxseg6:
1797case Intrinsic::riscv_vluxseg7:
1798case Intrinsic::riscv_vluxseg8:
1799return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 4,
1800/*IsStore*/false,
1801/*IsUnitStrided*/false);
1802case Intrinsic::riscv_vlsseg2_mask:
1803case Intrinsic::riscv_vlsseg3_mask:
1804case Intrinsic::riscv_vlsseg4_mask:
1805case Intrinsic::riscv_vlsseg5_mask:
1806case Intrinsic::riscv_vlsseg6_mask:
1807case Intrinsic::riscv_vlsseg7_mask:
1808case Intrinsic::riscv_vlsseg8_mask:
1809case Intrinsic::riscv_vloxseg2_mask:
1810case Intrinsic::riscv_vloxseg3_mask:
1811case Intrinsic::riscv_vloxseg4_mask:
1812case Intrinsic::riscv_vloxseg5_mask:
1813case Intrinsic::riscv_vloxseg6_mask:
1814case Intrinsic::riscv_vloxseg7_mask:
1815case Intrinsic::riscv_vloxseg8_mask:
1816case Intrinsic::riscv_vluxseg2_mask:
1817case Intrinsic::riscv_vluxseg3_mask:
1818case Intrinsic::riscv_vluxseg4_mask:
1819case Intrinsic::riscv_vluxseg5_mask:
1820case Intrinsic::riscv_vluxseg6_mask:
1821case Intrinsic::riscv_vluxseg7_mask:
1822case Intrinsic::riscv_vluxseg8_mask:
1823return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 6,
1824/*IsStore*/false,
1825/*IsUnitStrided*/false);
1826case Intrinsic::riscv_vsseg2:
1827case Intrinsic::riscv_vsseg3:
1828case Intrinsic::riscv_vsseg4:
1829case Intrinsic::riscv_vsseg5:
1830case Intrinsic::riscv_vsseg6:
1831case Intrinsic::riscv_vsseg7:
1832case Intrinsic::riscv_vsseg8:
1833return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 3,
1834/*IsStore*/true,
1835/*IsUnitStrided*/false);
1836case Intrinsic::riscv_vsseg2_mask:
1837case Intrinsic::riscv_vsseg3_mask:
1838case Intrinsic::riscv_vsseg4_mask:
1839case Intrinsic::riscv_vsseg5_mask:
1840case Intrinsic::riscv_vsseg6_mask:
1841case Intrinsic::riscv_vsseg7_mask:
1842case Intrinsic::riscv_vsseg8_mask:
1843return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 4,
1844/*IsStore*/true,
1845/*IsUnitStrided*/false);
1846case Intrinsic::riscv_vssseg2:
1847case Intrinsic::riscv_vssseg3:
1848case Intrinsic::riscv_vssseg4:
1849case Intrinsic::riscv_vssseg5:
1850case Intrinsic::riscv_vssseg6:
1851case Intrinsic::riscv_vssseg7:
1852case Intrinsic::riscv_vssseg8:
1853case Intrinsic::riscv_vsoxseg2:
1854case Intrinsic::riscv_vsoxseg3:
1855case Intrinsic::riscv_vsoxseg4:
1856case Intrinsic::riscv_vsoxseg5:
1857case Intrinsic::riscv_vsoxseg6:
1858case Intrinsic::riscv_vsoxseg7:
1859case Intrinsic::riscv_vsoxseg8:
1860case Intrinsic::riscv_vsuxseg2:
1861case Intrinsic::riscv_vsuxseg3:
1862case Intrinsic::riscv_vsuxseg4:
1863case Intrinsic::riscv_vsuxseg5:
1864case Intrinsic::riscv_vsuxseg6:
1865case Intrinsic::riscv_vsuxseg7:
1866case Intrinsic::riscv_vsuxseg8:
1867return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 4,
1868/*IsStore*/true,
1869/*IsUnitStrided*/false);
1870case Intrinsic::riscv_vssseg2_mask:
1871case Intrinsic::riscv_vssseg3_mask:
1872case Intrinsic::riscv_vssseg4_mask:
1873case Intrinsic::riscv_vssseg5_mask:
1874case Intrinsic::riscv_vssseg6_mask:
1875case Intrinsic::riscv_vssseg7_mask:
1876case Intrinsic::riscv_vssseg8_mask:
1877case Intrinsic::riscv_vsoxseg2_mask:
1878case Intrinsic::riscv_vsoxseg3_mask:
1879case Intrinsic::riscv_vsoxseg4_mask:
1880case Intrinsic::riscv_vsoxseg5_mask:
1881case Intrinsic::riscv_vsoxseg6_mask:
1882case Intrinsic::riscv_vsoxseg7_mask:
1883case Intrinsic::riscv_vsoxseg8_mask:
1884case Intrinsic::riscv_vsuxseg2_mask:
1885case Intrinsic::riscv_vsuxseg3_mask:
1886case Intrinsic::riscv_vsuxseg4_mask:
1887case Intrinsic::riscv_vsuxseg5_mask:
1888case Intrinsic::riscv_vsuxseg6_mask:
1889case Intrinsic::riscv_vsuxseg7_mask:
1890case Intrinsic::riscv_vsuxseg8_mask:
1891return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 5,
1892/*IsStore*/true,
1893/*IsUnitStrided*/false);
1894 }
1895}
1896
1897boolRISCVTargetLowering::isLegalAddressingMode(constDataLayout &DL,
1898constAddrMode &AM,Type *Ty,
1899unsigned AS,
1900Instruction *I) const{
1901// No global is ever allowed as a base.
1902if (AM.BaseGV)
1903returnfalse;
1904
1905// None of our addressing modes allows a scalable offset
1906if (AM.ScalableOffset)
1907returnfalse;
1908
1909// RVV instructions only support register addressing.
1910if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1911return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1912
1913// Require a 12-bit signed offset.
1914if (!isInt<12>(AM.BaseOffs))
1915returnfalse;
1916
1917switch (AM.Scale) {
1918case 0:// "r+i" or just "i", depending on HasBaseReg.
1919break;
1920case 1:
1921if (!AM.HasBaseReg)// allow "r+i".
1922break;
1923returnfalse;// disallow "r+r" or "r+r+i".
1924default:
1925returnfalse;
1926 }
1927
1928returntrue;
1929}
1930
1931boolRISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const{
1932return isInt<12>(Imm);
1933}
1934
1935boolRISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const{
1936return isInt<12>(Imm);
1937}
1938
1939// On RV32, 64-bit integers are split into their high and low parts and held
1940// in two different registers, so the trunc is free since the low register can
1941// just be used.
1942// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1943// isTruncateFree?
1944boolRISCVTargetLowering::isTruncateFree(Type *SrcTy,Type *DstTy) const{
1945if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1946returnfalse;
1947unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1948unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1949return (SrcBits == 64 && DestBits == 32);
1950}
1951
1952boolRISCVTargetLowering::isTruncateFree(EVT SrcVT,EVT DstVT) const{
1953// We consider i64->i32 free on RV64 since we have good selection of W
1954// instructions that make promoting operations back to i64 free in many cases.
1955if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1956 !DstVT.isInteger())
1957returnfalse;
1958unsigned SrcBits = SrcVT.getSizeInBits();
1959unsigned DestBits = DstVT.getSizeInBits();
1960return (SrcBits == 64 && DestBits == 32);
1961}
1962
1963boolRISCVTargetLowering::isTruncateFree(SDValue Val,EVT VT2) const{
1964EVT SrcVT = Val.getValueType();
1965// free truncate from vnsrl and vnsra
1966if (Subtarget.hasVInstructions() &&
1967 (Val.getOpcode() ==ISD::SRL || Val.getOpcode() ==ISD::SRA) &&
1968 SrcVT.isVector() && VT2.isVector()) {
1969unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1970unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1971if (SrcBits == DestBits * 2) {
1972returntrue;
1973 }
1974 }
1975returnTargetLowering::isTruncateFree(Val, VT2);
1976}
1977
1978boolRISCVTargetLowering::isZExtFree(SDValue Val,EVT VT2) const{
1979// Zexts are free if they can be combined with a load.
1980// Don't advertise i32->i64 zextload as being free for RV64. It interacts
1981// poorly with type legalization of compares preferring sext.
1982if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1983EVT MemVT = LD->getMemoryVT();
1984if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1985 (LD->getExtensionType() ==ISD::NON_EXTLOAD ||
1986 LD->getExtensionType() ==ISD::ZEXTLOAD))
1987returntrue;
1988 }
1989
1990returnTargetLowering::isZExtFree(Val, VT2);
1991}
1992
1993boolRISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,EVT DstVT) const{
1994return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1995}
1996
1997boolRISCVTargetLowering::signExtendConstant(constConstantInt *CI) const{
1998return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1999}
2000
2001boolRISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const{
2002return Subtarget.hasStdExtZbb() ||
2003 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2004}
2005
2006boolRISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const{
2007return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2008 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2009}
2010
2011boolRISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
2012constInstruction &AndI) const{
2013// We expect to be able to match a bit extraction instruction if the Zbs
2014// extension is supported and the mask is a power of two. However, we
2015// conservatively return false if the mask would fit in an ANDI instruction,
2016// on the basis that it's possible the sinking+duplication of the AND in
2017// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2018// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2019if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2020returnfalse;
2021ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2022if (!Mask)
2023returnfalse;
2024return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2025}
2026
2027boolRISCVTargetLowering::hasAndNotCompare(SDValueY) const{
2028EVT VT =Y.getValueType();
2029
2030// FIXME: Support vectors once we have tests.
2031if (VT.isVector())
2032returnfalse;
2033
2034return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2035 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2036}
2037
2038boolRISCVTargetLowering::hasBitTest(SDValueX,SDValueY) const{
2039// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2040if (Subtarget.hasStdExtZbs())
2041returnX.getValueType().isScalarInteger();
2042auto *C = dyn_cast<ConstantSDNode>(Y);
2043// XTheadBs provides th.tst (similar to bexti), if Y is a constant
2044if (Subtarget.hasVendorXTHeadBs())
2045returnC !=nullptr;
2046// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2047returnC &&C->getAPIntValue().ule(10);
2048}
2049
2050boolRISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
2051EVT VT) const{
2052// Only enable for rvv.
2053if (!VT.isVector() || !Subtarget.hasVInstructions())
2054returnfalse;
2055
2056if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2057returnfalse;
2058
2059returntrue;
2060}
2061
2062boolRISCVTargetLowering::shouldConvertConstantLoadToIntImm(constAPInt &Imm,
2063Type *Ty) const{
2064assert(Ty->isIntegerTy());
2065
2066unsigned BitSize = Ty->getIntegerBitWidth();
2067if (BitSize > Subtarget.getXLen())
2068returnfalse;
2069
2070// Fast path, assume 32-bit immediates are cheap.
2071 int64_t Val = Imm.getSExtValue();
2072if (isInt<32>(Val))
2073returntrue;
2074
2075// A constant pool entry may be more aligned thant he load we're trying to
2076// replace. If we don't support unaligned scalar mem, prefer the constant
2077// pool.
2078// TODO: Can the caller pass down the alignment?
2079if (!Subtarget.enableUnalignedScalarMem())
2080returntrue;
2081
2082// Prefer to keep the load if it would require many instructions.
2083// This uses the same threshold we use for constant pools but doesn't
2084// check useConstantPoolForLargeInts.
2085// TODO: Should we keep the load only when we're definitely going to emit a
2086// constant pool?
2087
2088RISCVMatInt::InstSeq Seq =RISCVMatInt::generateInstSeq(Val, Subtarget);
2089return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2090}
2091
2092boolRISCVTargetLowering::
2093 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
2094SDValueX,ConstantSDNode *XC,ConstantSDNode *CC,SDValueY,
2095unsigned OldShiftOpcode,unsigned NewShiftOpcode,
2096SelectionDAG &DAG) const{
2097// One interesting pattern that we'd want to form is 'bit extract':
2098// ((1 >> Y) & 1) ==/!= 0
2099// But we also need to be careful not to try to reverse that fold.
2100
2101// Is this '((1 >> Y) & 1)'?
2102if (XC && OldShiftOpcode ==ISD::SRL && XC->isOne())
2103returnfalse;// Keep the 'bit extract' pattern.
2104
2105// Will this be '((1 >> Y) & 1)' after the transform?
2106if (NewShiftOpcode ==ISD::SRL &&CC->isOne())
2107returntrue;// Do form the 'bit extract' pattern.
2108
2109// If 'X' is a constant, and we transform, then we will immediately
2110// try to undo the fold, thus causing endless combine loop.
2111// So only do the transform if X is not a constant. This matches the default
2112// implementation of this function.
2113return !XC;
2114}
2115
2116boolRISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const{
2117unsigned Opc = VecOp.getOpcode();
2118
2119// Assume target opcodes can't be scalarized.
2120// TODO - do we have any exceptions?
2121if (Opc >=ISD::BUILTIN_OP_END || !isBinOp(Opc))
2122returnfalse;
2123
2124// If the vector op is not supported, try to convert to scalar.
2125EVT VecVT = VecOp.getValueType();
2126if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2127returntrue;
2128
2129// If the vector op is supported, but the scalar op is not, the transform may
2130// not be worthwhile.
2131// Permit a vector binary operation can be converted to scalar binary
2132// operation which is custom lowered with illegal type.
2133EVT ScalarVT = VecVT.getScalarType();
2134returnisOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2135isOperationCustom(Opc, ScalarVT);
2136}
2137
2138boolRISCVTargetLowering::isOffsetFoldingLegal(
2139constGlobalAddressSDNode *GA) const{
2140// In order to maximise the opportunity for common subexpression elimination,
2141// keep a separate ADD node for the global address offset instead of folding
2142// it in the global address node. Later peephole optimisations may choose to
2143// fold it back in when profitable.
2144returnfalse;
2145}
2146
2147// Returns 0-31 if the fli instruction is available for the type and this is
2148// legal FP immediate for the type. Returns -1 otherwise.
2149intRISCVTargetLowering::getLegalZfaFPImm(constAPFloat &Imm,EVT VT) const{
2150if (!Subtarget.hasStdExtZfa())
2151return -1;
2152
2153bool IsSupportedVT =false;
2154if (VT == MVT::f16) {
2155 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2156 }elseif (VT == MVT::f32) {
2157 IsSupportedVT =true;
2158 }elseif (VT == MVT::f64) {
2159assert(Subtarget.hasStdExtD() &&"Expect D extension");
2160 IsSupportedVT =true;
2161 }
2162
2163if (!IsSupportedVT)
2164return -1;
2165
2166returnRISCVLoadFPImm::getLoadFPImm(Imm);
2167}
2168
2169boolRISCVTargetLowering::isFPImmLegal(constAPFloat &Imm,EVT VT,
2170bool ForCodeSize) const{
2171bool IsLegalVT =false;
2172if (VT == MVT::f16)
2173 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2174elseif (VT == MVT::f32)
2175 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2176elseif (VT == MVT::f64)
2177 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2178elseif (VT == MVT::bf16)
2179 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2180
2181if (!IsLegalVT)
2182returnfalse;
2183
2184if (getLegalZfaFPImm(Imm, VT) >= 0)
2185returntrue;
2186
2187// Cannot create a 64 bit floating-point immediate value for rv32.
2188if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2189// td can handle +0.0 or -0.0 already.
2190// -0.0 can be created by fmv + fneg.
2191return Imm.isZero();
2192 }
2193
2194// Special case: fmv + fneg
2195if (Imm.isNegZero())
2196returntrue;
2197
2198// Building an integer and then converting requires a fmv at the end of
2199// the integer sequence. The fmv is not required for Zfinx.
2200constint FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2201constintCost =
2202 FmvCost +RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2203 Subtarget.getXLen(), Subtarget);
2204returnCost <=FPImmCost;
2205}
2206
2207// TODO: This is very conservative.
2208boolRISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT,EVT SrcVT,
2209unsigned Index) const{
2210if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
2211returnfalse;
2212
2213// Only support extracting a fixed from a fixed vector for now.
2214if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2215returnfalse;
2216
2217EVT EltVT = ResVT.getVectorElementType();
2218assert(EltVT == SrcVT.getVectorElementType() &&"Should hold for node");
2219
2220// The smallest type we can slide is i8.
2221// TODO: We can extract index 0 from a mask vector without a slide.
2222if (EltVT == MVT::i1)
2223returnfalse;
2224
2225unsigned ResElts = ResVT.getVectorNumElements();
2226unsigned SrcElts = SrcVT.getVectorNumElements();
2227
2228unsigned MinVLen = Subtarget.getRealMinVLen();
2229unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2230
2231// If we're extracting only data from the first VLEN bits of the source
2232// then we can always do this with an m1 vslidedown.vx. Restricting the
2233// Index ensures we can use a vslidedown.vi.
2234// TODO: We can generalize this when the exact VLEN is known.
2235if (Index + ResElts <= MinVLMAX && Index < 31)
2236returntrue;
2237
2238// Convervatively only handle extracting half of a vector.
2239// TODO: We can do arbitrary slidedowns, but for now only support extracting
2240// the upper half of a vector until we have more test coverage.
2241// TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2242// a cheap extract. However, this case is important in practice for
2243// shuffled extracts of longer vectors. How resolve?
2244return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2245}
2246
2247MVTRISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2248CallingConv::IDCC,
2249EVT VT) const{
2250// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2251// We might still end up using a GPR but that will be decided based on ABI.
2252if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2253 !Subtarget.hasStdExtZfhminOrZhinxmin())
2254return MVT::f32;
2255
2256MVT PartVT =TargetLowering::getRegisterTypeForCallingConv(Context,CC, VT);
2257
2258return PartVT;
2259}
2260
2261unsigned
2262RISCVTargetLowering::getNumRegisters(LLVMContext &Context,EVT VT,
2263 std::optional<MVT> RegisterVT) const{
2264// Pair inline assembly operand
2265if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2266 *RegisterVT == MVT::Untyped)
2267return 1;
2268
2269returnTargetLowering::getNumRegisters(Context, VT, RegisterVT);
2270}
2271
2272unsignedRISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2273CallingConv::IDCC,
2274EVT VT) const{
2275// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2276// We might still end up using a GPR but that will be decided based on ABI.
2277if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2278 !Subtarget.hasStdExtZfhminOrZhinxmin())
2279return 1;
2280
2281returnTargetLowering::getNumRegistersForCallingConv(Context,CC, VT);
2282}
2283
2284unsignedRISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2285LLVMContext &Context,CallingConv::IDCC,EVT VT,EVT &IntermediateVT,
2286unsigned &NumIntermediates,MVT &RegisterVT) const{
2287unsigned NumRegs =TargetLowering::getVectorTypeBreakdownForCallingConv(
2288 Context,CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2289
2290return NumRegs;
2291}
2292
2293// Changes the condition code and swaps operands if necessary, so the SetCC
2294// operation matches one of the comparisons supported directly by branches
2295// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2296// with 1/-1.
2297staticvoidtranslateSetCCForBranch(constSDLoc &DL,SDValue &LHS,SDValue &RHS,
2298ISD::CondCode &CC,SelectionDAG &DAG) {
2299// If this is a single bit test that can't be handled by ANDI, shift the
2300// bit to be tested to the MSB and perform a signed compare with 0.
2301if (isIntEqualitySetCC(CC) &&isNullConstant(RHS) &&
2302LHS.getOpcode() ==ISD::AND &&LHS.hasOneUse() &&
2303 isa<ConstantSDNode>(LHS.getOperand(1))) {
2304uint64_t Mask =LHS.getConstantOperandVal(1);
2305if ((isPowerOf2_64(Mask) ||isMask_64(Mask)) && !isInt<12>(Mask)) {
2306unsigned ShAmt = 0;
2307if (isPowerOf2_64(Mask)) {
2308CC =CC ==ISD::SETEQ ?ISD::SETGE :ISD::SETLT;
2309 ShAmt =LHS.getValueSizeInBits() - 1 -Log2_64(Mask);
2310 }else {
2311 ShAmt =LHS.getValueSizeInBits() -llvm::bit_width(Mask);
2312 }
2313
2314LHS =LHS.getOperand(0);
2315if (ShAmt != 0)
2316LHS = DAG.getNode(ISD::SHL,DL,LHS.getValueType(),LHS,
2317 DAG.getConstant(ShAmt,DL,LHS.getValueType()));
2318return;
2319 }
2320 }
2321
2322if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2323 int64_tC = RHSC->getSExtValue();
2324switch (CC) {
2325default:break;
2326caseISD::SETGT:
2327// Convert X > -1 to X >= 0.
2328if (C == -1) {
2329RHS = DAG.getConstant(0,DL,RHS.getValueType());
2330CC =ISD::SETGE;
2331return;
2332 }
2333break;
2334caseISD::SETLT:
2335// Convert X < 1 to 0 >= X.
2336if (C == 1) {
2337RHS =LHS;
2338LHS = DAG.getConstant(0,DL,RHS.getValueType());
2339CC =ISD::SETGE;
2340return;
2341 }
2342break;
2343 }
2344 }
2345
2346switch (CC) {
2347default:
2348break;
2349caseISD::SETGT:
2350caseISD::SETLE:
2351caseISD::SETUGT:
2352caseISD::SETULE:
2353CC =ISD::getSetCCSwappedOperands(CC);
2354std::swap(LHS,RHS);
2355break;
2356 }
2357}
2358
2359RISCVII::VLMULRISCVTargetLowering::getLMUL(MVT VT) {
2360if (VT.isRISCVVectorTuple()) {
2361if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2362 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2363returnRISCVII::LMUL_F8;
2364if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2365 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2366returnRISCVII::LMUL_F4;
2367if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2368 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2369returnRISCVII::LMUL_F2;
2370if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2371 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2372returnRISCVII::LMUL_1;
2373if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2374 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2375returnRISCVII::LMUL_2;
2376if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2377returnRISCVII::LMUL_4;
2378llvm_unreachable("Invalid vector tuple type LMUL.");
2379 }
2380
2381assert(VT.isScalableVector() &&"Expecting a scalable vector type");
2382unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2383if (VT.getVectorElementType() == MVT::i1)
2384 KnownSize *= 8;
2385
2386switch (KnownSize) {
2387default:
2388llvm_unreachable("Invalid LMUL.");
2389case 8:
2390returnRISCVII::VLMUL::LMUL_F8;
2391case 16:
2392returnRISCVII::VLMUL::LMUL_F4;
2393case 32:
2394returnRISCVII::VLMUL::LMUL_F2;
2395case 64:
2396returnRISCVII::VLMUL::LMUL_1;
2397case 128:
2398returnRISCVII::VLMUL::LMUL_2;
2399case 256:
2400returnRISCVII::VLMUL::LMUL_4;
2401case 512:
2402returnRISCVII::VLMUL::LMUL_8;
2403 }
2404}
2405
2406unsignedRISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2407switch (LMul) {
2408default:
2409llvm_unreachable("Invalid LMUL.");
2410caseRISCVII::VLMUL::LMUL_F8:
2411caseRISCVII::VLMUL::LMUL_F4:
2412caseRISCVII::VLMUL::LMUL_F2:
2413caseRISCVII::VLMUL::LMUL_1:
2414return RISCV::VRRegClassID;
2415caseRISCVII::VLMUL::LMUL_2:
2416return RISCV::VRM2RegClassID;
2417caseRISCVII::VLMUL::LMUL_4:
2418return RISCV::VRM4RegClassID;
2419caseRISCVII::VLMUL::LMUL_8:
2420return RISCV::VRM8RegClassID;
2421 }
2422}
2423
2424unsignedRISCVTargetLowering::getSubregIndexByMVT(MVT VT,unsigned Index) {
2425RISCVII::VLMUL LMUL =getLMUL(VT);
2426if (LMUL ==RISCVII::VLMUL::LMUL_F8 ||
2427 LMUL ==RISCVII::VLMUL::LMUL_F4 ||
2428 LMUL ==RISCVII::VLMUL::LMUL_F2 ||
2429 LMUL ==RISCVII::VLMUL::LMUL_1) {
2430static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2431"Unexpected subreg numbering");
2432return RISCV::sub_vrm1_0 + Index;
2433 }
2434if (LMUL ==RISCVII::VLMUL::LMUL_2) {
2435static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2436"Unexpected subreg numbering");
2437return RISCV::sub_vrm2_0 + Index;
2438 }
2439if (LMUL ==RISCVII::VLMUL::LMUL_4) {
2440static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2441"Unexpected subreg numbering");
2442return RISCV::sub_vrm4_0 + Index;
2443 }
2444llvm_unreachable("Invalid vector type.");
2445}
2446
2447unsignedRISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2448if (VT.isRISCVVectorTuple()) {
2449unsigned NF = VT.getRISCVVectorTupleNumFields();
2450unsigned RegsPerField =
2451 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2452 (NF *RISCV::RVVBitsPerBlock));
2453switch (RegsPerField) {
2454case 1:
2455if (NF == 2)
2456return RISCV::VRN2M1RegClassID;
2457if (NF == 3)
2458return RISCV::VRN3M1RegClassID;
2459if (NF == 4)
2460return RISCV::VRN4M1RegClassID;
2461if (NF == 5)
2462return RISCV::VRN5M1RegClassID;
2463if (NF == 6)
2464return RISCV::VRN6M1RegClassID;
2465if (NF == 7)
2466return RISCV::VRN7M1RegClassID;
2467if (NF == 8)
2468return RISCV::VRN8M1RegClassID;
2469break;
2470case 2:
2471if (NF == 2)
2472return RISCV::VRN2M2RegClassID;
2473if (NF == 3)
2474return RISCV::VRN3M2RegClassID;
2475if (NF == 4)
2476return RISCV::VRN4M2RegClassID;
2477break;
2478case 4:
2479assert(NF == 2);
2480return RISCV::VRN2M4RegClassID;
2481default:
2482break;
2483 }
2484llvm_unreachable("Invalid vector tuple type RegClass.");
2485 }
2486
2487if (VT.getVectorElementType() == MVT::i1)
2488return RISCV::VRRegClassID;
2489returngetRegClassIDForLMUL(getLMUL(VT));
2490}
2491
2492// Attempt to decompose a subvector insert/extract between VecVT and
2493// SubVecVT via subregister indices. Returns the subregister index that
2494// can perform the subvector insert/extract with the given element index, as
2495// well as the index corresponding to any leftover subvectors that must be
2496// further inserted/extracted within the register class for SubVecVT.
2497std::pair<unsigned, unsigned>
2498RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2499MVT VecVT,MVT SubVecVT,unsigned InsertExtractIdx,
2500constRISCVRegisterInfo *TRI) {
2501static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2502 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2503 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2504"Register classes not ordered");
2505unsigned VecRegClassID =getRegClassIDForVecVT(VecVT);
2506unsigned SubRegClassID =getRegClassIDForVecVT(SubVecVT);
2507
2508// If VecVT is a vector tuple type, either it's the tuple type with same
2509// RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2510if (VecVT.isRISCVVectorTuple()) {
2511if (VecRegClassID == SubRegClassID)
2512return {RISCV::NoSubRegister, 0};
2513
2514assert(SubVecVT.isScalableVector() &&
2515"Only allow scalable vector subvector.");
2516assert(getLMUL(VecVT) ==getLMUL(SubVecVT) &&
2517"Invalid vector tuple insert/extract for vector and subvector with "
2518"different LMUL.");
2519return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2520 }
2521
2522// Try to compose a subregister index that takes us from the incoming
2523// LMUL>1 register class down to the outgoing one. At each step we half
2524// the LMUL:
2525// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2526// Note that this is not guaranteed to find a subregister index, such as
2527// when we are extracting from one VR type to another.
2528unsigned SubRegIdx = RISCV::NoSubRegister;
2529for (constunsigned RCID :
2530 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2531if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2532 VecVT = VecVT.getHalfNumVectorElementsVT();
2533bool IsHi =
2534 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2535 SubRegIdx =TRI->composeSubRegIndices(SubRegIdx,
2536getSubregIndexByMVT(VecVT, IsHi));
2537if (IsHi)
2538 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2539 }
2540return {SubRegIdx, InsertExtractIdx};
2541}
2542
2543// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2544// stores for those types.
2545bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const{
2546return !Subtarget.useRVVForFixedLengthVectors() ||
2547 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2548}
2549
2550boolRISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const{
2551if (!ScalarTy.isSimple())
2552returnfalse;
2553switch (ScalarTy.getSimpleVT().SimpleTy) {
2554case MVT::iPTR:
2555return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() :true;
2556case MVT::i8:
2557case MVT::i16:
2558case MVT::i32:
2559returntrue;
2560case MVT::i64:
2561return Subtarget.hasVInstructionsI64();
2562case MVT::f16:
2563return Subtarget.hasVInstructionsF16Minimal();
2564case MVT::bf16:
2565return Subtarget.hasVInstructionsBF16Minimal();
2566case MVT::f32:
2567return Subtarget.hasVInstructionsF32();
2568case MVT::f64:
2569return Subtarget.hasVInstructionsF64();
2570default:
2571returnfalse;
2572 }
2573}
2574
2575
2576unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const{
2577returnNumRepeatedDivisors;
2578}
2579
2580staticSDValuegetVLOperand(SDValueOp) {
2581assert((Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
2582Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN) &&
2583"Unexpected opcode");
2584bool HasChain =Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN;
2585unsigned IntNo =Op.getConstantOperandVal(HasChain ? 1 : 0);
2586constRISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2587 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2588if (!II)
2589returnSDValue();
2590returnOp.getOperand(II->VLOperand + 1 + HasChain);
2591}
2592
2593staticbooluseRVVForFixedLengthVectorVT(MVT VT,
2594constRISCVSubtarget &Subtarget) {
2595assert(VT.isFixedLengthVector() &&"Expected a fixed length vector type!");
2596if (!Subtarget.useRVVForFixedLengthVectors())
2597returnfalse;
2598
2599// We only support a set of vector types with a consistent maximum fixed size
2600// across all supported vector element types to avoid legalization issues.
2601// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2602// fixed-length vector type we support is 1024 bytes.
2603if (VT.getFixedSizeInBits() > 1024 * 8)
2604returnfalse;
2605
2606unsigned MinVLen = Subtarget.getRealMinVLen();
2607
2608MVT EltVT = VT.getVectorElementType();
2609
2610// Don't use RVV for vectors we cannot scalarize if required.
2611switch (EltVT.SimpleTy) {
2612// i1 is supported but has different rules.
2613default:
2614returnfalse;
2615case MVT::i1:
2616// Masks can only use a single register.
2617if (VT.getVectorNumElements() > MinVLen)
2618returnfalse;
2619 MinVLen /= 8;
2620break;
2621case MVT::i8:
2622case MVT::i16:
2623case MVT::i32:
2624break;
2625case MVT::i64:
2626if (!Subtarget.hasVInstructionsI64())
2627returnfalse;
2628break;
2629case MVT::f16:
2630if (!Subtarget.hasVInstructionsF16Minimal())
2631returnfalse;
2632break;
2633case MVT::bf16:
2634if (!Subtarget.hasVInstructionsBF16Minimal())
2635returnfalse;
2636break;
2637case MVT::f32:
2638if (!Subtarget.hasVInstructionsF32())
2639returnfalse;
2640break;
2641case MVT::f64:
2642if (!Subtarget.hasVInstructionsF64())
2643returnfalse;
2644break;
2645 }
2646
2647// Reject elements larger than ELEN.
2648if (EltVT.getSizeInBits() > Subtarget.getELen())
2649returnfalse;
2650
2651unsigned LMul =divideCeil(VT.getSizeInBits(), MinVLen);
2652// Don't use RVV for types that don't fit.
2653if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2654returnfalse;
2655
2656// TODO: Perhaps an artificial restriction, but worth having whilst getting
2657// the base fixed length RVV support in place.
2658if (!VT.isPow2VectorType())
2659returnfalse;
2660
2661returntrue;
2662}
2663
2664bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const{
2665 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2666}
2667
2668// Return the largest legal scalable vector type that matches VT's element type.
2669staticMVTgetContainerForFixedLengthVector(constTargetLowering &TLI,MVT VT,
2670constRISCVSubtarget &Subtarget) {
2671// This may be called before legal types are setup.
2672assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2673useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2674"Expected legal fixed length vector!");
2675
2676unsigned MinVLen = Subtarget.getRealMinVLen();
2677unsigned MaxELen = Subtarget.getELen();
2678
2679MVT EltVT = VT.getVectorElementType();
2680switch (EltVT.SimpleTy) {
2681default:
2682llvm_unreachable("unexpected element type for RVV container");
2683case MVT::i1:
2684case MVT::i8:
2685case MVT::i16:
2686case MVT::i32:
2687case MVT::i64:
2688case MVT::bf16:
2689case MVT::f16:
2690case MVT::f32:
2691case MVT::f64: {
2692// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2693// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2694// each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2695unsigned NumElts =
2696 (VT.getVectorNumElements() *RISCV::RVVBitsPerBlock) / MinVLen;
2697 NumElts = std::max(NumElts,RISCV::RVVBitsPerBlock / MaxELen);
2698assert(isPowerOf2_32(NumElts) &&"Expected power of 2 NumElts");
2699returnMVT::getScalableVectorVT(EltVT, NumElts);
2700 }
2701 }
2702}
2703
2704staticMVTgetContainerForFixedLengthVector(SelectionDAG &DAG,MVT VT,
2705constRISCVSubtarget &Subtarget) {
2706returngetContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2707 Subtarget);
2708}
2709
2710MVTRISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const{
2711 return ::getContainerForFixedLengthVector(*this, VT,getSubtarget());
2712}
2713
2714// Grow V to consume an entire RVV register.
2715staticSDValueconvertToScalableVector(EVT VT,SDValue V,SelectionDAG &DAG,
2716constRISCVSubtarget &Subtarget) {
2717assert(VT.isScalableVector() &&
2718"Expected to convert into a scalable vector!");
2719assert(V.getValueType().isFixedLengthVector() &&
2720"Expected a fixed length vector operand!");
2721SDLocDL(V);
2722SDValue Zero = DAG.getVectorIdxConstant(0,DL);
2723return DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, DAG.getUNDEF(VT), V, Zero);
2724}
2725
2726// Shrink V so it's just big enough to maintain a VT's worth of data.
2727staticSDValueconvertFromScalableVector(EVT VT,SDValue V,SelectionDAG &DAG,
2728constRISCVSubtarget &Subtarget) {
2729assert(VT.isFixedLengthVector() &&
2730"Expected to convert into a fixed length vector!");
2731assert(V.getValueType().isScalableVector() &&
2732"Expected a scalable vector operand!");
2733SDLocDL(V);
2734SDValue Zero = DAG.getConstant(0,DL, Subtarget.getXLenVT());
2735return DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, V, Zero);
2736}
2737
2738/// Return the type of the mask type suitable for masking the provided
2739/// vector type. This is simply an i1 element type vector of the same
2740/// (possibly scalable) length.
2741staticMVTgetMaskTypeFor(MVT VecVT) {
2742assert(VecVT.isVector());
2743ElementCount EC = VecVT.getVectorElementCount();
2744returnMVT::getVectorVT(MVT::i1, EC);
2745}
2746
2747/// Creates an all ones mask suitable for masking a vector of type VecTy with
2748/// vector length VL. .
2749staticSDValuegetAllOnesMask(MVT VecVT,SDValue VL,constSDLoc &DL,
2750SelectionDAG &DAG) {
2751MVT MaskVT =getMaskTypeFor(VecVT);
2752return DAG.getNode(RISCVISD::VMSET_VL,DL, MaskVT, VL);
2753}
2754
2755static std::pair<SDValue, SDValue>
2756getDefaultScalableVLOps(MVT VecVT,constSDLoc &DL,SelectionDAG &DAG,
2757constRISCVSubtarget &Subtarget) {
2758assert(VecVT.isScalableVector() &&"Expecting a scalable vector");
2759SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2760SDValue Mask =getAllOnesMask(VecVT, VL,DL, DAG);
2761return {Mask, VL};
2762}
2763
2764static std::pair<SDValue, SDValue>
2765getDefaultVLOps(uint64_t NumElts,MVT ContainerVT,constSDLoc &DL,
2766SelectionDAG &DAG,constRISCVSubtarget &Subtarget) {
2767assert(ContainerVT.isScalableVector() &&"Expecting scalable container type");
2768SDValue VL = DAG.getConstant(NumElts,DL, Subtarget.getXLenVT());
2769SDValue Mask =getAllOnesMask(ContainerVT, VL,DL, DAG);
2770return {Mask, VL};
2771}
2772
2773// Gets the two common "VL" operands: an all-ones mask and the vector length.
2774// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2775// the vector type that the fixed-length vector is contained in. Otherwise if
2776// VecVT is scalable, then ContainerVT should be the same as VecVT.
2777static std::pair<SDValue, SDValue>
2778getDefaultVLOps(MVT VecVT,MVT ContainerVT,constSDLoc &DL,SelectionDAG &DAG,
2779constRISCVSubtarget &Subtarget) {
2780if (VecVT.isFixedLengthVector())
2781returngetDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT,DL, DAG,
2782 Subtarget);
2783assert(ContainerVT.isScalableVector() &&"Expecting scalable container type");
2784returngetDefaultScalableVLOps(ContainerVT,DL, DAG, Subtarget);
2785}
2786
2787SDValueRISCVTargetLowering::computeVLMax(MVT VecVT,constSDLoc &DL,
2788SelectionDAG &DAG) const{
2789assert(VecVT.isScalableVector() &&"Expected scalable vector");
2790return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2791 VecVT.getVectorElementCount());
2792}
2793
2794std::pair<unsigned, unsigned>
2795RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
2796constRISCVSubtarget &Subtarget) {
2797assert(VecVT.isScalableVector() &&"Expected scalable vector");
2798
2799unsigned EltSize = VecVT.getScalarSizeInBits();
2800unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2801
2802unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2803unsigned MaxVLMAX =
2804RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2805
2806unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2807unsigned MinVLMAX =
2808RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2809
2810return std::make_pair(MinVLMAX, MaxVLMAX);
2811}
2812
2813// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2814// of either is (currently) supported. This can get us into an infinite loop
2815// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2816// as a ..., etc.
2817// Until either (or both) of these can reliably lower any node, reporting that
2818// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2819// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2820// which is not desirable.
2821boolRISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2822EVT VT,unsigned DefinedValues) const{
2823returnfalse;
2824}
2825
2826InstructionCostRISCVTargetLowering::getLMULCost(MVT VT) const{
2827// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2828// implementation-defined.
2829if (!VT.isVector())
2830returnInstructionCost::getInvalid();
2831unsigned DLenFactor = Subtarget.getDLenFactor();
2832unsignedCost;
2833if (VT.isScalableVector()) {
2834unsigned LMul;
2835bool Fractional;
2836 std::tie(LMul, Fractional) =
2837RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
2838if (Fractional)
2839Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2840else
2841Cost = (LMul * DLenFactor);
2842 }else {
2843Cost =divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2844 }
2845returnCost;
2846}
2847
2848
2849/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2850/// is generally quadratic in the number of vreg implied by LMUL. Note that
2851/// operand (index and possibly mask) are handled separately.
2852InstructionCostRISCVTargetLowering::getVRGatherVVCost(MVT VT) const{
2853returngetLMULCost(VT) *getLMULCost(VT);
2854}
2855
2856/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2857/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2858/// or may track the vrgather.vv cost. It is implementation-dependent.
2859InstructionCostRISCVTargetLowering::getVRGatherVICost(MVT VT) const{
2860returngetLMULCost(VT);
2861}
2862
2863/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2864/// for the type VT. (This does not cover the vslide1up or vslide1down
2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2866/// or may track the vrgather.vv cost. It is implementation-dependent.
2867InstructionCostRISCVTargetLowering::getVSlideVXCost(MVT VT) const{
2868returngetLMULCost(VT);
2869}
2870
2871/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2872/// for the type VT. (This does not cover the vslide1up or vslide1down
2873/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2874/// or may track the vrgather.vv cost. It is implementation-dependent.
2875InstructionCostRISCVTargetLowering::getVSlideVICost(MVT VT) const{
2876returngetLMULCost(VT);
2877}
2878
2879staticSDValuelowerINT_TO_FP(SDValueOp,SelectionDAG &DAG,
2880constRISCVSubtarget &Subtarget) {
2881// f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2882// bf16 conversions are always promoted to f32.
2883if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2884Op.getValueType() == MVT::bf16) {
2885bool IsStrict =Op->isStrictFPOpcode();
2886
2887SDLocDL(Op);
2888if (IsStrict) {
2889SDValue Val = DAG.getNode(Op.getOpcode(),DL, {MVT::f32, MVT::Other},
2890 {Op.getOperand(0), Op.getOperand(1)});
2891return DAG.getNode(ISD::STRICT_FP_ROUND,DL,
2892 {Op.getValueType(), MVT::Other},
2893 {Val.getValue(1), Val.getValue(0),
2894 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true)});
2895 }
2896return DAG.getNode(
2897ISD::FP_ROUND,DL,Op.getValueType(),
2898 DAG.getNode(Op.getOpcode(),DL, MVT::f32,Op.getOperand(0)),
2899 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));
2900 }
2901
2902// Other operations are legal.
2903returnOp;
2904}
2905
2906staticSDValuelowerFP_TO_INT_SAT(SDValueOp,SelectionDAG &DAG,
2907constRISCVSubtarget &Subtarget) {
2908// RISC-V FP-to-int conversions saturate to the destination register size, but
2909// don't produce 0 for nan. We can use a conversion instruction and fix the
2910// nan case with a compare and a select.
2911SDValue Src =Op.getOperand(0);
2912
2913MVT DstVT =Op.getSimpleValueType();
2914EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2915
2916bool IsSigned =Op.getOpcode() ==ISD::FP_TO_SINT_SAT;
2917
2918if (!DstVT.isVector()) {
2919// For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2920// the result.
2921if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2922 Src.getValueType() == MVT::bf16) {
2923 Src = DAG.getNode(ISD::FP_EXTEND,SDLoc(Op), MVT::f32, Src);
2924 }
2925
2926unsigned Opc;
2927if (SatVT == DstVT)
2928 Opc = IsSigned ?RISCVISD::FCVT_X :RISCVISD::FCVT_XU;
2929elseif (DstVT == MVT::i64 && SatVT == MVT::i32)
2930 Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;
2931else
2932returnSDValue();
2933// FIXME: Support other SatVTs by clamping before or after the conversion.
2934
2935SDLocDL(Op);
2936SDValue FpToInt = DAG.getNode(
2937 Opc,DL, DstVT, Src,
2938 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, Subtarget.getXLenVT()));
2939
2940if (Opc ==RISCVISD::FCVT_WU_RV64)
2941 FpToInt = DAG.getZeroExtendInReg(FpToInt,DL, MVT::i32);
2942
2943SDValue ZeroInt = DAG.getConstant(0,DL, DstVT);
2944return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2945ISD::CondCode::SETUO);
2946 }
2947
2948// Vectors.
2949
2950MVT DstEltVT = DstVT.getVectorElementType();
2951MVT SrcVT = Src.getSimpleValueType();
2952MVT SrcEltVT = SrcVT.getVectorElementType();
2953unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2954unsigned DstEltSize = DstEltVT.getSizeInBits();
2955
2956// Only handle saturating to the destination type.
2957if (SatVT != DstEltVT)
2958returnSDValue();
2959
2960MVT DstContainerVT = DstVT;
2961MVT SrcContainerVT = SrcVT;
2962if (DstVT.isFixedLengthVector()) {
2963 DstContainerVT =getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2964 SrcContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2965assert(DstContainerVT.getVectorElementCount() ==
2966 SrcContainerVT.getVectorElementCount() &&
2967"Expected same element count");
2968 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2969 }
2970
2971SDLocDL(Op);
2972
2973auto [Mask, VL] =getDefaultVLOps(DstVT, DstContainerVT,DL, DAG, Subtarget);
2974
2975SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL,DL, Mask.getValueType(),
2976 {Src, Src, DAG.getCondCode(ISD::SETNE),
2977 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2978
2979// Need to widen by more than 1 step, promote the FP type, then do a widening
2980// convert.
2981if (DstEltSize > (2 * SrcEltSize)) {
2982assert(SrcContainerVT.getVectorElementType() == MVT::f16 &&"Unexpected VT!");
2983MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2984 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL,DL, InterVT, Src, Mask, VL);
2985 }
2986
2987MVT CvtContainerVT = DstContainerVT;
2988MVT CvtEltVT = DstEltVT;
2989if (SrcEltSize > (2 * DstEltSize)) {
2990 CvtEltVT =MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2991 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2992 }
2993
2994unsigned RVVOpc =
2995 IsSigned ?RISCVISD::VFCVT_RTZ_X_F_VL :RISCVISD::VFCVT_RTZ_XU_F_VL;
2996SDValue Res = DAG.getNode(RVVOpc,DL, CvtContainerVT, Src, Mask, VL);
2997
2998while (CvtContainerVT != DstContainerVT) {
2999 CvtEltVT =MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3000 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3001// Rounding mode here is arbitrary since we aren't shifting out any bits.
3002unsigned ClipOpc = IsSigned ?RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3003 :RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3004 Res = DAG.getNode(ClipOpc,DL, CvtContainerVT, Res, Mask, VL);
3005 }
3006
3007SDValue SplatZero = DAG.getNode(
3008RISCVISD::VMV_V_X_VL,DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3009 DAG.getConstant(0,DL, Subtarget.getXLenVT()), VL);
3010 Res = DAG.getNode(RISCVISD::VMERGE_VL,DL, DstContainerVT, IsNan, SplatZero,
3011 Res, DAG.getUNDEF(DstContainerVT), VL);
3012
3013if (DstVT.isFixedLengthVector())
3014 Res =convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3015
3016return Res;
3017}
3018
3019staticSDValuelowerFP_TO_INT(SDValueOp,SelectionDAG &DAG,
3020constRISCVSubtarget &Subtarget) {
3021bool IsStrict =Op->isStrictFPOpcode();
3022SDValue SrcVal =Op.getOperand(IsStrict ? 1 : 0);
3023
3024// f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3025// bf16 conversions are always promoted to f32.
3026if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3027 SrcVal.getValueType() == MVT::bf16) {
3028SDLocDL(Op);
3029if (IsStrict) {
3030SDValue Ext =
3031 DAG.getNode(ISD::STRICT_FP_EXTEND,DL, {MVT::f32, MVT::Other},
3032 {Op.getOperand(0), SrcVal});
3033return DAG.getNode(Op.getOpcode(),DL, {Op.getValueType(), MVT::Other},
3034 {Ext.getValue(1), Ext.getValue(0)});
3035 }
3036return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),
3037 DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32, SrcVal));
3038 }
3039
3040// Other operations are legal.
3041returnOp;
3042}
3043
3044staticRISCVFPRndMode::RoundingModematchRoundingOp(unsigned Opc) {
3045switch (Opc) {
3046caseISD::FROUNDEVEN:
3047caseISD::STRICT_FROUNDEVEN:
3048case ISD::VP_FROUNDEVEN:
3049returnRISCVFPRndMode::RNE;
3050caseISD::FTRUNC:
3051caseISD::STRICT_FTRUNC:
3052case ISD::VP_FROUNDTOZERO:
3053returnRISCVFPRndMode::RTZ;
3054caseISD::FFLOOR:
3055caseISD::STRICT_FFLOOR:
3056case ISD::VP_FFLOOR:
3057returnRISCVFPRndMode::RDN;
3058caseISD::FCEIL:
3059caseISD::STRICT_FCEIL:
3060case ISD::VP_FCEIL:
3061returnRISCVFPRndMode::RUP;
3062caseISD::FROUND:
3063caseISD::STRICT_FROUND:
3064case ISD::VP_FROUND:
3065returnRISCVFPRndMode::RMM;
3066caseISD::FRINT:
3067case ISD::VP_FRINT:
3068returnRISCVFPRndMode::DYN;
3069 }
3070
3071returnRISCVFPRndMode::Invalid;
3072}
3073
3074// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3075// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3076// the integer domain and back. Taking care to avoid converting values that are
3077// nan or already correct.
3078staticSDValue
3079lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValueOp,SelectionDAG &DAG,
3080constRISCVSubtarget &Subtarget) {
3081MVT VT =Op.getSimpleValueType();
3082assert(VT.isVector() &&"Unexpected type");
3083
3084SDLocDL(Op);
3085
3086SDValue Src =Op.getOperand(0);
3087
3088MVT ContainerVT = VT;
3089if (VT.isFixedLengthVector()) {
3090 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
3091 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3092 }
3093
3094SDValue Mask, VL;
3095if (Op->isVPOpcode()) {
3096 Mask =Op.getOperand(1);
3097if (VT.isFixedLengthVector())
3098 Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3099 Subtarget);
3100 VL =Op.getOperand(2);
3101 }else {
3102 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
3103 }
3104
3105// Freeze the source since we are increasing the number of uses.
3106 Src = DAG.getFreeze(Src);
3107
3108// We do the conversion on the absolute value and fix the sign at the end.
3109SDValue Abs = DAG.getNode(RISCVISD::FABS_VL,DL, ContainerVT, Src, Mask, VL);
3110
3111// Determine the largest integer that can be represented exactly. This and
3112// values larger than it don't have any fractional bits so don't need to
3113// be converted.
3114constfltSemantics &FltSem = ContainerVT.getFltSemantics();
3115unsigned Precision =APFloat::semanticsPrecision(FltSem);
3116APFloat MaxVal =APFloat(FltSem);
3117 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3118/*IsSigned*/false,APFloat::rmNearestTiesToEven);
3119SDValue MaxValNode =
3120 DAG.getConstantFP(MaxVal,DL, ContainerVT.getVectorElementType());
3121SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, ContainerVT,
3122 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3123
3124// If abs(Src) was larger than MaxVal or nan, keep it.
3125MVT SetccVT =MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3126 Mask =
3127 DAG.getNode(RISCVISD::SETCC_VL,DL, SetccVT,
3128 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3129 Mask, Mask, VL});
3130
3131// Truncate to integer and convert back to FP.
3132MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3133MVT XLenVT = Subtarget.getXLenVT();
3134SDValue Truncated;
3135
3136switch (Op.getOpcode()) {
3137default:
3138llvm_unreachable("Unexpected opcode");
3139caseISD::FRINT:
3140case ISD::VP_FRINT:
3141caseISD::FCEIL:
3142case ISD::VP_FCEIL:
3143caseISD::FFLOOR:
3144case ISD::VP_FFLOOR:
3145caseISD::FROUND:
3146caseISD::FROUNDEVEN:
3147case ISD::VP_FROUND:
3148case ISD::VP_FROUNDEVEN:
3149case ISD::VP_FROUNDTOZERO: {
3150RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Op.getOpcode());
3151assert(FRM !=RISCVFPRndMode::Invalid);
3152 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL,DL, IntVT, Src, Mask,
3153 DAG.getTargetConstant(FRM,DL, XLenVT), VL);
3154break;
3155 }
3156caseISD::FTRUNC:
3157 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL,DL, IntVT, Src,
3158 Mask, VL);
3159break;
3160caseISD::FNEARBYINT:
3161case ISD::VP_FNEARBYINT:
3162 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL,DL, ContainerVT, Src,
3163 Mask, VL);
3164break;
3165 }
3166
3167// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3168if (Truncated.getOpcode() !=RISCVISD::VFROUND_NOEXCEPT_VL)
3169 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL,DL, ContainerVT, Truncated,
3170 Mask, VL);
3171
3172// Restore the original sign so that -0.0 is preserved.
3173 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL,DL, ContainerVT, Truncated,
3174 Src, Src, Mask, VL);
3175
3176if (!VT.isFixedLengthVector())
3177return Truncated;
3178
3179returnconvertFromScalableVector(VT, Truncated, DAG, Subtarget);
3180}
3181
3182// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3183// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3184// qNan and coverting the new source to integer and back to FP.
3185staticSDValue
3186lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValueOp,SelectionDAG &DAG,
3187constRISCVSubtarget &Subtarget) {
3188SDLocDL(Op);
3189MVT VT =Op.getSimpleValueType();
3190SDValue Chain =Op.getOperand(0);
3191SDValue Src =Op.getOperand(1);
3192
3193MVT ContainerVT = VT;
3194if (VT.isFixedLengthVector()) {
3195 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3197 }
3198
3199auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
3200
3201// Freeze the source since we are increasing the number of uses.
3202 Src = DAG.getFreeze(Src);
3203
3204// Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3205MVT MaskVT = Mask.getSimpleValueType();
3206SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL,DL,
3207 DAG.getVTList(MaskVT, MVT::Other),
3208 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3209 DAG.getUNDEF(MaskVT), Mask, VL});
3210 Chain = Unorder.getValue(1);
3211 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL,DL,
3212 DAG.getVTList(ContainerVT, MVT::Other),
3213 {Chain, Src, Src, Src, Unorder, VL});
3214 Chain = Src.getValue(1);
3215
3216// We do the conversion on the absolute value and fix the sign at the end.
3217SDValue Abs = DAG.getNode(RISCVISD::FABS_VL,DL, ContainerVT, Src, Mask, VL);
3218
3219// Determine the largest integer that can be represented exactly. This and
3220// values larger than it don't have any fractional bits so don't need to
3221// be converted.
3222constfltSemantics &FltSem = ContainerVT.getFltSemantics();
3223unsigned Precision =APFloat::semanticsPrecision(FltSem);
3224APFloat MaxVal =APFloat(FltSem);
3225 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3226/*IsSigned*/false,APFloat::rmNearestTiesToEven);
3227SDValue MaxValNode =
3228 DAG.getConstantFP(MaxVal,DL, ContainerVT.getVectorElementType());
3229SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, ContainerVT,
3230 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3231
3232// If abs(Src) was larger than MaxVal or nan, keep it.
3233 Mask = DAG.getNode(
3234RISCVISD::SETCC_VL,DL, MaskVT,
3235 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3236
3237// Truncate to integer and convert back to FP.
3238MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3239MVT XLenVT = Subtarget.getXLenVT();
3240SDValue Truncated;
3241
3242switch (Op.getOpcode()) {
3243default:
3244llvm_unreachable("Unexpected opcode");
3245caseISD::STRICT_FCEIL:
3246caseISD::STRICT_FFLOOR:
3247caseISD::STRICT_FROUND:
3248caseISD::STRICT_FROUNDEVEN: {
3249RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Op.getOpcode());
3250assert(FRM !=RISCVFPRndMode::Invalid);
3251 Truncated = DAG.getNode(
3252RISCVISD::STRICT_VFCVT_RM_X_F_VL,DL, DAG.getVTList(IntVT, MVT::Other),
3253 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3254break;
3255 }
3256caseISD::STRICT_FTRUNC:
3257 Truncated =
3258 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL,DL,
3259 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3260break;
3261caseISD::STRICT_FNEARBYINT:
3262 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL,DL,
3263 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3264 Mask, VL);
3265break;
3266 }
3267 Chain = Truncated.getValue(1);
3268
3269// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3270if (Op.getOpcode() !=ISD::STRICT_FNEARBYINT) {
3271 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL,DL,
3272 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3273 Truncated, Mask, VL);
3274 Chain = Truncated.getValue(1);
3275 }
3276
3277// Restore the original sign so that -0.0 is preserved.
3278 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL,DL, ContainerVT, Truncated,
3279 Src, Src, Mask, VL);
3280
3281if (VT.isFixedLengthVector())
3282 Truncated =convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3283return DAG.getMergeValues({Truncated, Chain},DL);
3284}
3285
3286staticSDValue
3287lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValueOp,SelectionDAG &DAG,
3288constRISCVSubtarget &Subtarget) {
3289MVT VT =Op.getSimpleValueType();
3290if (VT.isVector())
3291returnlowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3292
3293if (DAG.shouldOptForSize())
3294returnSDValue();
3295
3296SDLocDL(Op);
3297SDValue Src =Op.getOperand(0);
3298
3299// Create an integer the size of the mantissa with the MSB set. This and all
3300// values larger than it don't have any fractional bits so don't need to be
3301// converted.
3302constfltSemantics &FltSem = VT.getFltSemantics();
3303unsigned Precision =APFloat::semanticsPrecision(FltSem);
3304APFloat MaxVal =APFloat(FltSem);
3305 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3306/*IsSigned*/false,APFloat::rmNearestTiesToEven);
3307SDValue MaxValNode = DAG.getConstantFP(MaxVal,DL, VT);
3308
3309RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Op.getOpcode());
3310return DAG.getNode(RISCVISD::FROUND,DL, VT, Src, MaxValNode,
3311 DAG.getTargetConstant(FRM,DL, Subtarget.getXLenVT()));
3312}
3313
3314// Expand vector LRINT and LLRINT by converting to the integer domain.
3315staticSDValuelowerVectorXRINT(SDValueOp,SelectionDAG &DAG,
3316constRISCVSubtarget &Subtarget) {
3317MVT VT =Op.getSimpleValueType();
3318assert(VT.isVector() &&"Unexpected type");
3319
3320SDLocDL(Op);
3321SDValue Src =Op.getOperand(0);
3322MVT ContainerVT = VT;
3323
3324if (VT.isFixedLengthVector()) {
3325 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
3326 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3327 }
3328
3329auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
3330SDValue Truncated = DAG.getNode(
3331RISCVISD::VFCVT_RM_X_F_VL,DL, ContainerVT, Src, Mask,
3332 DAG.getTargetConstant(RISCVFPRndMode::DYN,DL, Subtarget.getXLenVT()),
3333 VL);
3334
3335if (!VT.isFixedLengthVector())
3336return Truncated;
3337
3338returnconvertFromScalableVector(VT, Truncated, DAG, Subtarget);
3339}
3340
3341staticSDValue
3342getVSlidedown(SelectionDAG &DAG,constRISCVSubtarget &Subtarget,
3343constSDLoc &DL,EVT VT,SDValue Passthru,SDValueOp,
3344SDValueOffset,SDValue Mask,SDValue VL,
3345unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3346if (Passthru.isUndef())
3347 Policy =RISCVII::TAIL_AGNOSTIC |RISCVII::MASK_AGNOSTIC;
3348SDValue PolicyOp = DAG.getTargetConstant(Policy,DL, Subtarget.getXLenVT());
3349SDValue Ops[] = {Passthru,Op,Offset, Mask, VL, PolicyOp};
3350return DAG.getNode(RISCVISD::VSLIDEDOWN_VL,DL, VT, Ops);
3351}
3352
3353staticSDValue
3354getVSlideup(SelectionDAG &DAG,constRISCVSubtarget &Subtarget,constSDLoc &DL,
3355EVT VT,SDValue Passthru,SDValueOp,SDValueOffset,SDValue Mask,
3356SDValue VL,
3357unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3358if (Passthru.isUndef())
3359 Policy =RISCVII::TAIL_AGNOSTIC |RISCVII::MASK_AGNOSTIC;
3360SDValue PolicyOp = DAG.getTargetConstant(Policy,DL, Subtarget.getXLenVT());
3361SDValue Ops[] = {Passthru,Op,Offset, Mask, VL, PolicyOp};
3362return DAG.getNode(RISCVISD::VSLIDEUP_VL,DL, VT, Ops);
3363}
3364
3365staticMVTgetLMUL1VT(MVT VT) {
3366assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3367"Unexpected vector MVT");
3368returnMVT::getScalableVectorVT(
3369 VT.getVectorElementType(),
3370RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3371}
3372
3373structVIDSequence {
3374 int64_tStepNumerator;
3375unsignedStepDenominator;
3376 int64_tAddend;
3377};
3378
3379static std::optional<APInt>getExactInteger(constAPFloat &APF,
3380uint32_tBitWidth) {
3381// We will use a SINT_TO_FP to materialize this constant so we should use a
3382// signed APSInt here.
3383APSInt ValInt(BitWidth,/*IsUnsigned*/false);
3384// We use an arbitrary rounding mode here. If a floating-point is an exact
3385// integer (e.g., 1.0), the rounding mode does not affect the output value. If
3386// the rounding mode changes the output value, then it is not an exact
3387// integer.
3388RoundingMode ArbitraryRM =RoundingMode::TowardZero;
3389bool IsExact;
3390// If it is out of signed integer range, it will return an invalid operation.
3391// If it is not an exact integer, IsExact is false.
3392if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3393APFloatBase::opInvalidOp) ||
3394 !IsExact)
3395return std::nullopt;
3396return ValInt.extractBits(BitWidth, 0);
3397}
3398
3399// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3400// to the (non-zero) step S and start value X. This can be then lowered as the
3401// RVV sequence (VID * S) + X, for example.
3402// The step S is represented as an integer numerator divided by a positive
3403// denominator. Note that the implementation currently only identifies
3404// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3405// cannot detect 2/3, for example.
3406// Note that this method will also match potentially unappealing index
3407// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3408// determine whether this is worth generating code for.
3409//
3410// EltSizeInBits is the size of the type that the sequence will be calculated
3411// in, i.e. SEW for build_vectors or XLEN for address calculations.
3412static std::optional<VIDSequence>isSimpleVIDSequence(SDValueOp,
3413unsigned EltSizeInBits) {
3414assert(Op.getOpcode() ==ISD::BUILD_VECTOR &&"Unexpected BUILD_VECTOR");
3415if (!cast<BuildVectorSDNode>(Op)->isConstant())
3416return std::nullopt;
3417bool IsInteger =Op.getValueType().isInteger();
3418
3419 std::optional<unsigned> SeqStepDenom;
3420 std::optional<APInt> SeqStepNum;
3421 std::optional<APInt> SeqAddend;
3422 std::optional<std::pair<APInt, unsigned>> PrevElt;
3423assert(EltSizeInBits >=Op.getValueType().getScalarSizeInBits());
3424
3425// First extract the ops into a list of constant integer values. This may not
3426// be possible for floats if they're not all representable as integers.
3427SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3428constunsigned OpSize =Op.getScalarValueSizeInBits();
3429for (auto [Idx, Elt] :enumerate(Op->op_values())) {
3430if (Elt.isUndef()) {
3431 Elts[Idx] = std::nullopt;
3432continue;
3433 }
3434if (IsInteger) {
3435 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3436 }else {
3437auto ExactInteger =
3438getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3439if (!ExactInteger)
3440return std::nullopt;
3441 Elts[Idx] = *ExactInteger;
3442 }
3443 }
3444
3445for (auto [Idx, Elt] :enumerate(Elts)) {
3446// Assume undef elements match the sequence; we just have to be careful
3447// when interpolating across them.
3448if (!Elt)
3449continue;
3450
3451if (PrevElt) {
3452// Calculate the step since the last non-undef element, and ensure
3453// it's consistent across the entire sequence.
3454unsigned IdxDiff =Idx - PrevElt->second;
3455APInt ValDiff = *Elt - PrevElt->first;
3456
3457// A zero-value value difference means that we're somewhere in the middle
3458// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3459// step change before evaluating the sequence.
3460if (ValDiff == 0)
3461continue;
3462
3463 int64_t Remainder = ValDiff.srem(IdxDiff);
3464// Normalize the step if it's greater than 1.
3465if (Remainder != ValDiff.getSExtValue()) {
3466// The difference must cleanly divide the element span.
3467if (Remainder != 0)
3468return std::nullopt;
3469 ValDiff = ValDiff.sdiv(IdxDiff);
3470 IdxDiff = 1;
3471 }
3472
3473if (!SeqStepNum)
3474 SeqStepNum = ValDiff;
3475elseif (ValDiff != SeqStepNum)
3476return std::nullopt;
3477
3478if (!SeqStepDenom)
3479 SeqStepDenom = IdxDiff;
3480elseif (IdxDiff != *SeqStepDenom)
3481return std::nullopt;
3482 }
3483
3484// Record this non-undef element for later.
3485if (!PrevElt || PrevElt->first != *Elt)
3486 PrevElt = std::make_pair(*Elt,Idx);
3487 }
3488
3489// We need to have logged a step for this to count as a legal index sequence.
3490if (!SeqStepNum || !SeqStepDenom)
3491return std::nullopt;
3492
3493// Loop back through the sequence and validate elements we might have skipped
3494// while waiting for a valid step. While doing this, log any sequence addend.
3495for (auto [Idx, Elt] :enumerate(Elts)) {
3496if (!Elt)
3497continue;
3498APInt ExpectedVal =
3499 (APInt(EltSizeInBits,Idx,/*isSigned=*/false,/*implicitTrunc=*/true) *
3500 *SeqStepNum)
3501 .sdiv(*SeqStepDenom);
3502
3503APInt Addend = *Elt - ExpectedVal;
3504if (!SeqAddend)
3505 SeqAddend = Addend;
3506elseif (Addend != SeqAddend)
3507return std::nullopt;
3508 }
3509
3510assert(SeqAddend &&"Must have an addend if we have a step");
3511
3512returnVIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3513 SeqAddend->getSExtValue()};
3514}
3515
3516// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3517// and lower it as a VRGATHER_VX_VL from the source vector.
3518staticSDValuematchSplatAsGather(SDValue SplatVal,MVT VT,constSDLoc &DL,
3519SelectionDAG &DAG,
3520constRISCVSubtarget &Subtarget) {
3521if (SplatVal.getOpcode() !=ISD::EXTRACT_VECTOR_ELT)
3522returnSDValue();
3523SDValue Src = SplatVal.getOperand(0);
3524// Don't perform this optimization for i1 vectors, or if the element types are
3525// different
3526// FIXME: Support i1 vectors, maybe by promoting to i8?
3527MVT EltTy = VT.getVectorElementType();
3528MVT SrcVT = Src.getSimpleValueType();
3529if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
3530returnSDValue();
3531SDValueIdx = SplatVal.getOperand(1);
3532// The index must be a legal type.
3533if (Idx.getValueType() != Subtarget.getXLenVT())
3534returnSDValue();
3535
3536// Check that we know Idx lies within VT
3537if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3538auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3539if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3540returnSDValue();
3541 }
3542
3543// Convert fixed length vectors to scalable
3544MVT ContainerVT = VT;
3545if (VT.isFixedLengthVector())
3546 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
3547
3548MVT SrcContainerVT = SrcVT;
3549if (SrcVT.isFixedLengthVector()) {
3550 SrcContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3551 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3552 }
3553
3554// Put Vec in a VT sized vector
3555if (SrcContainerVT.getVectorMinNumElements() <
3556 ContainerVT.getVectorMinNumElements())
3557 Src = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT,
3558 DAG.getUNDEF(ContainerVT), Src,
3559 DAG.getVectorIdxConstant(0,DL));
3560else
3561 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Src,
3562 DAG.getVectorIdxConstant(0,DL));
3563
3564// We checked that Idx fits inside VT earlier
3565auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
3566SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL,DL, ContainerVT, Src,
3567Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3568if (VT.isFixedLengthVector())
3569 Gather =convertFromScalableVector(VT, Gather, DAG, Subtarget);
3570return Gather;
3571}
3572
3573/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3574/// which constitute a large proportion of the elements. In such cases we can
3575/// splat a vector with the dominant element and make up the shortfall with
3576/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3577/// Note that this includes vectors of 2 elements by association. The
3578/// upper-most element is the "dominant" one, allowing us to use a splat to
3579/// "insert" the upper element, and an insert of the lower element at position
3580/// 0, which improves codegen.
3581staticSDValuelowerBuildVectorViaDominantValues(SDValueOp,SelectionDAG &DAG,
3582constRISCVSubtarget &Subtarget) {
3583MVT VT =Op.getSimpleValueType();
3584assert(VT.isFixedLengthVector() &&"Unexpected vector!");
3585
3586MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
3587
3588SDLocDL(Op);
3589auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
3590
3591MVT XLenVT = Subtarget.getXLenVT();
3592unsigned NumElts =Op.getNumOperands();
3593
3594SDValue DominantValue;
3595unsigned MostCommonCount = 0;
3596DenseMap<SDValue, unsigned> ValueCounts;
3597unsigned NumUndefElts =
3598count_if(Op->op_values(), [](constSDValue &V) { return V.isUndef(); });
3599
3600// Track the number of scalar loads we know we'd be inserting, estimated as
3601// any non-zero floating-point constant. Other kinds of element are either
3602// already in registers or are materialized on demand. The threshold at which
3603// a vector load is more desirable than several scalar materializion and
3604// vector-insertion instructions is not known.
3605unsigned NumScalarLoads = 0;
3606
3607for (SDValue V :Op->op_values()) {
3608if (V.isUndef())
3609continue;
3610
3611unsigned &Count = ValueCounts[V];
3612if (0 == Count)
3613if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3614 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3615
3616// Is this value dominant? In case of a tie, prefer the highest element as
3617// it's cheaper to insert near the beginning of a vector than it is at the
3618// end.
3619if (++Count >= MostCommonCount) {
3620 DominantValue = V;
3621 MostCommonCount = Count;
3622 }
3623 }
3624
3625assert(DominantValue &&"Not expecting an all-undef BUILD_VECTOR");
3626unsigned NumDefElts = NumElts - NumUndefElts;
3627unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3628
3629// Don't perform this optimization when optimizing for size, since
3630// materializing elements and inserting them tends to cause code bloat.
3631if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3632 (NumElts != 2 ||ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3633 ((MostCommonCount > DominantValueCountThreshold) ||
3634 (ValueCounts.size() <=Log2_32(NumDefElts)))) {
3635// Start by splatting the most common element.
3636SDValue Vec = DAG.getSplatBuildVector(VT,DL, DominantValue);
3637
3638DenseSet<SDValue> Processed{DominantValue};
3639
3640// We can handle an insert into the last element (of a splat) via
3641// v(f)slide1down. This is slightly better than the vslideup insert
3642// lowering as it avoids the need for a vector group temporary. It
3643// is also better than using vmerge.vx as it avoids the need to
3644// materialize the mask in a vector register.
3645if (SDValue LastOp =Op->getOperand(Op->getNumOperands() - 1);
3646 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3647 LastOp != DominantValue) {
3648 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3649auto OpCode =
3650 VT.isFloatingPoint() ?RISCVISD::VFSLIDE1DOWN_VL :RISCVISD::VSLIDE1DOWN_VL;
3651if (!VT.isFloatingPoint())
3652 LastOp = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, LastOp);
3653 Vec = DAG.getNode(OpCode,DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3654 LastOp, Mask, VL);
3655 Vec =convertFromScalableVector(VT, Vec, DAG, Subtarget);
3656 Processed.insert(LastOp);
3657 }
3658
3659MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3660for (constauto &OpIdx :enumerate(Op->ops())) {
3661constSDValue &V = OpIdx.value();
3662if (V.isUndef() || !Processed.insert(V).second)
3663continue;
3664if (ValueCounts[V] == 1) {
3665 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, VT, Vec, V,
3666 DAG.getVectorIdxConstant(OpIdx.index(),DL));
3667 }else {
3668// Blend in all instances of this value using a VSELECT, using a
3669// mask where each bit signals whether that element is the one
3670// we're after.
3671SmallVector<SDValue> Ops;
3672transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3673 return DAG.getConstant(V == V1, DL, XLenVT);
3674 });
3675 Vec = DAG.getNode(ISD::VSELECT,DL, VT,
3676 DAG.getBuildVector(SelMaskTy,DL, Ops),
3677 DAG.getSplatBuildVector(VT,DL, V), Vec);
3678 }
3679 }
3680
3681return Vec;
3682 }
3683
3684returnSDValue();
3685}
3686
3687staticSDValuelowerBuildVectorOfConstants(SDValueOp,SelectionDAG &DAG,
3688constRISCVSubtarget &Subtarget) {
3689MVT VT =Op.getSimpleValueType();
3690assert(VT.isFixedLengthVector() &&"Unexpected vector!");
3691
3692MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
3693
3694SDLocDL(Op);
3695auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
3696
3697MVT XLenVT = Subtarget.getXLenVT();
3698unsigned NumElts =Op.getNumOperands();
3699
3700if (VT.getVectorElementType() == MVT::i1) {
3701if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3702SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL,DL, ContainerVT, VL);
3703returnconvertFromScalableVector(VT, VMClr, DAG, Subtarget);
3704 }
3705
3706if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3707SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);
3708returnconvertFromScalableVector(VT, VMSet, DAG, Subtarget);
3709 }
3710
3711// Lower constant mask BUILD_VECTORs via an integer vector type, in
3712// scalar integer chunks whose bit-width depends on the number of mask
3713// bits and XLEN.
3714// First, determine the most appropriate scalar integer type to use. This
3715// is at most XLenVT, but may be shrunk to a smaller vector element type
3716// according to the size of the final vector - use i8 chunks rather than
3717// XLenVT if we're producing a v8i1. This results in more consistent
3718// codegen across RV32 and RV64.
3719unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3720 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3721// If we have to use more than one INSERT_VECTOR_ELT then this
3722// optimization is likely to increase code size; avoid peforming it in
3723// such a case. We can use a load from a constant pool in this case.
3724if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3725returnSDValue();
3726// Now we can create our integer vector type. Note that it may be larger
3727// than the resulting mask type: v4i1 would use v1i8 as its integer type.
3728unsigned IntegerViaVecElts =divideCeil(NumElts, NumViaIntegerBits);
3729MVT IntegerViaVecVT =
3730MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3731 IntegerViaVecElts);
3732
3733uint64_t Bits = 0;
3734unsigned BitPos = 0, IntegerEltIdx = 0;
3735SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3736
3737for (unsignedI = 0;I < NumElts;) {
3738SDValue V =Op.getOperand(I);
3739bool BitValue = !V.isUndef() && V->getAsZExtVal();
3740 Bits |= ((uint64_t)BitValue << BitPos);
3741 ++BitPos;
3742 ++I;
3743
3744// Once we accumulate enough bits to fill our scalar type or process the
3745// last element, insert into our vector and clear our accumulated data.
3746if (I % NumViaIntegerBits == 0 ||I == NumElts) {
3747if (NumViaIntegerBits <= 32)
3748 Bits = SignExtend64<32>(Bits);
3749SDValue Elt = DAG.getSignedConstant(Bits,DL, XLenVT);
3750 Elts[IntegerEltIdx] = Elt;
3751 Bits = 0;
3752 BitPos = 0;
3753 IntegerEltIdx++;
3754 }
3755 }
3756
3757SDValue Vec = DAG.getBuildVector(IntegerViaVecVT,DL, Elts);
3758
3759if (NumElts < NumViaIntegerBits) {
3760// If we're producing a smaller vector than our minimum legal integer
3761// type, bitcast to the equivalent (known-legal) mask type, and extract
3762// our final mask.
3763assert(IntegerViaVecVT == MVT::v1i8 &&"Unexpected mask vector type");
3764 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3765 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, Vec,
3766 DAG.getConstant(0,DL, XLenVT));
3767 }else {
3768// Else we must have produced an integer type with the same size as the
3769// mask type; bitcast for the final result.
3770assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3771 Vec = DAG.getBitcast(VT, Vec);
3772 }
3773
3774return Vec;
3775 }
3776
3777if (SDValueSplat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3778unsigned Opc = VT.isFloatingPoint() ?RISCVISD::VFMV_V_F_VL
3779 :RISCVISD::VMV_V_X_VL;
3780if (!VT.isFloatingPoint())
3781Splat = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT,Splat);
3782Splat =
3783 DAG.getNode(Opc,DL, ContainerVT, DAG.getUNDEF(ContainerVT),Splat, VL);
3784returnconvertFromScalableVector(VT,Splat, DAG, Subtarget);
3785 }
3786
3787// Try and match index sequences, which we can lower to the vid instruction
3788// with optional modifications. An all-undef vector is matched by
3789// getSplatValue, above.
3790if (auto SimpleVID =isSimpleVIDSequence(Op,Op.getScalarValueSizeInBits())) {
3791 int64_t StepNumerator = SimpleVID->StepNumerator;
3792unsigned StepDenominator = SimpleVID->StepDenominator;
3793 int64_t Addend = SimpleVID->Addend;
3794
3795assert(StepNumerator != 0 &&"Invalid step");
3796bool Negate =false;
3797 int64_t SplatStepVal = StepNumerator;
3798unsigned StepOpcode =ISD::MUL;
3799// Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3800// anyway as the shift of 63 won't fit in uimm5.
3801if (StepNumerator != 1 && StepNumerator !=INT64_MIN &&
3802isPowerOf2_64(std::abs(StepNumerator))) {
3803 Negate = StepNumerator < 0;
3804 StepOpcode =ISD::SHL;
3805 SplatStepVal =Log2_64(std::abs(StepNumerator));
3806 }
3807
3808// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3809// threshold since it's the immediate value many RVV instructions accept.
3810// There is no vmul.vi instruction so ensure multiply constant can fit in
3811// a single addi instruction.
3812if (((StepOpcode ==ISD::MUL && isInt<12>(SplatStepVal)) ||
3813 (StepOpcode ==ISD::SHL && isUInt<5>(SplatStepVal))) &&
3814isPowerOf2_32(StepDenominator) &&
3815 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3816MVT VIDVT =
3817 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3818MVT VIDContainerVT =
3819getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3820SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, VIDContainerVT, Mask, VL);
3821// Convert right out of the scalable type so we can use standard ISD
3822// nodes for the rest of the computation. If we used scalable types with
3823// these, we'd lose the fixed-length vector info and generate worse
3824// vsetvli code.
3825 VID =convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3826if ((StepOpcode ==ISD::MUL && SplatStepVal != 1) ||
3827 (StepOpcode ==ISD::SHL && SplatStepVal != 0)) {
3828SDValue SplatStep = DAG.getSignedConstant(SplatStepVal,DL, VIDVT);
3829 VID = DAG.getNode(StepOpcode,DL, VIDVT, VID, SplatStep);
3830 }
3831if (StepDenominator != 1) {
3832SDValue SplatStep =
3833 DAG.getConstant(Log2_64(StepDenominator),DL, VIDVT);
3834 VID = DAG.getNode(ISD::SRL,DL, VIDVT, VID, SplatStep);
3835 }
3836if (Addend != 0 || Negate) {
3837SDValue SplatAddend = DAG.getSignedConstant(Addend,DL, VIDVT);
3838 VID = DAG.getNode(Negate ?ISD::SUB :ISD::ADD,DL, VIDVT, SplatAddend,
3839 VID);
3840 }
3841if (VT.isFloatingPoint()) {
3842// TODO: Use vfwcvt to reduce register pressure.
3843 VID = DAG.getNode(ISD::SINT_TO_FP,DL, VT, VID);
3844 }
3845return VID;
3846 }
3847 }
3848
3849// For very small build_vectors, use a single scalar insert of a constant.
3850// TODO: Base this on constant rematerialization cost, not size.
3851constunsigned EltBitSize = VT.getScalarSizeInBits();
3852if (VT.getSizeInBits() <= 32 &&
3853ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3854MVT ViaIntVT =MVT::getIntegerVT(VT.getSizeInBits());
3855assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3856"Unexpected sequence type");
3857// If we can use the original VL with the modified element type, this
3858// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3859// be moved into InsertVSETVLI?
3860unsigned ViaVecLen =
3861 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3862MVT ViaVecVT =MVT::getVectorVT(ViaIntVT, ViaVecLen);
3863
3864uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3865uint64_t SplatValue = 0;
3866// Construct the amalgamated value at this larger vector type.
3867for (constauto &OpIdx :enumerate(Op->op_values())) {
3868constauto &SeqV = OpIdx.value();
3869if (!SeqV.isUndef())
3870 SplatValue |=
3871 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3872 }
3873
3874// On RV64, sign-extend from 32 to 64 bits where possible in order to
3875// achieve better constant materializion.
3876// On RV32, we need to sign-extend to use getSignedConstant.
3877if (ViaIntVT == MVT::i32)
3878 SplatValue = SignExtend64<32>(SplatValue);
3879
3880SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, ViaVecVT,
3881 DAG.getUNDEF(ViaVecVT),
3882 DAG.getSignedConstant(SplatValue,DL, XLenVT),
3883 DAG.getVectorIdxConstant(0,DL));
3884if (ViaVecLen != 1)
3885 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL,
3886MVT::getVectorVT(ViaIntVT, 1), Vec,
3887 DAG.getConstant(0,DL, XLenVT));
3888return DAG.getBitcast(VT, Vec);
3889 }
3890
3891
3892// Attempt to detect "hidden" splats, which only reveal themselves as splats
3893// when re-interpreted as a vector with a larger element type. For example,
3894// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3895// could be instead splat as
3896// v2i32 = build_vector i32 0x00010000, i32 0x00010000
3897// TODO: This optimization could also work on non-constant splats, but it
3898// would require bit-manipulation instructions to construct the splat value.
3899SmallVector<SDValue> Sequence;
3900constauto *BV = cast<BuildVectorSDNode>(Op);
3901if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3902ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3903 BV->getRepeatedSequence(Sequence) &&
3904 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3905unsigned SeqLen = Sequence.size();
3906MVT ViaIntVT =MVT::getIntegerVT(EltBitSize * SeqLen);
3907assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3908 ViaIntVT == MVT::i64) &&
3909"Unexpected sequence type");
3910
3911// If we can use the original VL with the modified element type, this
3912// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3913// be moved into InsertVSETVLI?
3914constunsigned RequiredVL = NumElts / SeqLen;
3915constunsigned ViaVecLen =
3916 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3917 NumElts : RequiredVL;
3918MVT ViaVecVT =MVT::getVectorVT(ViaIntVT, ViaVecLen);
3919
3920unsigned EltIdx = 0;
3921uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3922uint64_t SplatValue = 0;
3923// Construct the amalgamated value which can be splatted as this larger
3924// vector type.
3925for (constauto &SeqV : Sequence) {
3926if (!SeqV.isUndef())
3927 SplatValue |=
3928 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3929 EltIdx++;
3930 }
3931
3932// On RV64, sign-extend from 32 to 64 bits where possible in order to
3933// achieve better constant materializion.
3934// On RV32, we need to sign-extend to use getSignedConstant.
3935if (ViaIntVT == MVT::i32)
3936 SplatValue = SignExtend64<32>(SplatValue);
3937
3938// Since we can't introduce illegal i64 types at this stage, we can only
3939// perform an i64 splat on RV32 if it is its own sign-extended value. That
3940// way we can use RVV instructions to splat.
3941assert((ViaIntVT.bitsLE(XLenVT) ||
3942 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3943"Unexpected bitcast sequence");
3944if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3945SDValue ViaVL =
3946 DAG.getConstant(ViaVecVT.getVectorNumElements(),DL, XLenVT);
3947MVT ViaContainerVT =
3948getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3949SDValueSplat =
3950 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ViaContainerVT,
3951 DAG.getUNDEF(ViaContainerVT),
3952 DAG.getSignedConstant(SplatValue,DL, XLenVT), ViaVL);
3953Splat =convertFromScalableVector(ViaVecVT,Splat, DAG, Subtarget);
3954if (ViaVecLen != RequiredVL)
3955Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL,
3956MVT::getVectorVT(ViaIntVT, RequiredVL),Splat,
3957 DAG.getConstant(0,DL, XLenVT));
3958return DAG.getBitcast(VT,Splat);
3959 }
3960 }
3961
3962// If the number of signbits allows, see if we can lower as a <N x i8>.
3963// Our main goal here is to reduce LMUL (and thus work) required to
3964// build the constant, but we will also narrow if the resulting
3965// narrow vector is known to materialize cheaply.
3966// TODO: We really should be costing the smaller vector. There are
3967// profitable cases this misses.
3968if (EltBitSize > 8 && VT.isInteger() &&
3969 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3970 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3971SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3972DL,Op->ops());
3973 Source =convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3974 Source, DAG, Subtarget);
3975SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL,DL, ContainerVT, Source, Mask, VL);
3976returnconvertFromScalableVector(VT, Res, DAG, Subtarget);
3977 }
3978
3979if (SDValue Res =lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3980return Res;
3981
3982// For constant vectors, use generic constant pool lowering. Otherwise,
3983// we'd have to materialize constants in GPRs just to move them into the
3984// vector.
3985returnSDValue();
3986}
3987
3988staticunsignedgetPACKOpcode(unsigned DestBW,
3989constRISCVSubtarget &Subtarget) {
3990switch (DestBW) {
3991default:
3992llvm_unreachable("Unsupported pack size");
3993case 16:
3994return RISCV::PACKH;
3995case 32:
3996return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3997case 64:
3998assert(Subtarget.is64Bit());
3999return RISCV::PACK;
4000 }
4001}
4002
4003/// Double the element size of the build vector to reduce the number
4004/// of vslide1down in the build vector chain. In the worst case, this
4005/// trades three scalar operations for 1 vector operation. Scalar
4006/// operations are generally lower latency, and for out-of-order cores
4007/// we also benefit from additional parallelism.
4008staticSDValuelowerBuildVectorViaPacking(SDValueOp,SelectionDAG &DAG,
4009constRISCVSubtarget &Subtarget) {
4010SDLocDL(Op);
4011MVT VT =Op.getSimpleValueType();
4012assert(VT.isFixedLengthVector() &&"Unexpected vector!");
4013MVT ElemVT = VT.getVectorElementType();
4014if (!ElemVT.isInteger())
4015returnSDValue();
4016
4017// TODO: Relax these architectural restrictions, possibly with costing
4018// of the actual instructions required.
4019if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4020returnSDValue();
4021
4022unsigned NumElts = VT.getVectorNumElements();
4023unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4024if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4025 NumElts % 2 != 0)
4026returnSDValue();
4027
4028// Produce [B,A] packed into a type twice as wide. Note that all
4029// scalars are XLenVT, possibly masked (see below).
4030MVT XLenVT = Subtarget.getXLenVT();
4031SDValue Mask = DAG.getConstant(
4032APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits),DL, XLenVT);
4033auto pack = [&](SDValueA,SDValueB) {
4034// Bias the scheduling of the inserted operations to near the
4035// definition of the element - this tends to reduce register
4036// pressure overall.
4037SDLoc ElemDL(B);
4038if (Subtarget.hasStdExtZbkb())
4039// Note that we're relying on the high bits of the result being
4040// don't care. For PACKW, the result is *sign* extended.
4041returnSDValue(
4042 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4043 ElemDL, XLenVT,A,B),
4044 0);
4045
4046A = DAG.getNode(ISD::AND,SDLoc(A), XLenVT,A, Mask);
4047B = DAG.getNode(ISD::AND,SDLoc(B), XLenVT,B, Mask);
4048SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4049return DAG.getNode(ISD::OR, ElemDL, XLenVT,A,
4050 DAG.getNode(ISD::SHL, ElemDL, XLenVT,B, ShtAmt),
4051SDNodeFlags::Disjoint);
4052 };
4053
4054SmallVector<SDValue> NewOperands;
4055 NewOperands.reserve(NumElts / 2);
4056for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4057 NewOperands.push_back(pack(Op.getOperand(i),Op.getOperand(i + 1)));
4058assert(NumElts == NewOperands.size() * 2);
4059MVT WideVT =MVT::getIntegerVT(ElemSizeInBits * 2);
4060MVT WideVecVT =MVT::getVectorVT(WideVT, NumElts / 2);
4061return DAG.getNode(ISD::BITCAST,DL, VT,
4062 DAG.getBuildVector(WideVecVT,DL, NewOperands));
4063}
4064
4065staticSDValuelowerBUILD_VECTOR(SDValueOp,SelectionDAG &DAG,
4066constRISCVSubtarget &Subtarget) {
4067MVT VT =Op.getSimpleValueType();
4068assert(VT.isFixedLengthVector() &&"Unexpected vector!");
4069
4070MVT EltVT = VT.getVectorElementType();
4071MVT XLenVT = Subtarget.getXLenVT();
4072
4073SDLocDL(Op);
4074
4075// Proper support for f16 requires Zvfh. bf16 always requires special
4076// handling. We need to cast the scalar to integer and create an integer
4077// build_vector.
4078if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4079MVT IVT = VT.changeVectorElementType(MVT::i16);
4080SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4081for (unsignedI = 0, E =Op.getNumOperands();I != E; ++I) {
4082SDValue Elem =Op.getOperand(I);
4083if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4084 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4085// Called by LegalizeDAG, we need to use XLenVT operations since we
4086// can't create illegal types.
4087if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4088// Manually constant fold so the integer build_vector can be lowered
4089// better. Waiting for DAGCombine will be too late.
4090APInt V =
4091C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4092 NewOps[I] = DAG.getConstant(V,DL, XLenVT);
4093 }else {
4094 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Elem);
4095 }
4096 }else {
4097// Called by scalar type legalizer, we can use i16.
4098 NewOps[I] = DAG.getBitcast(MVT::i16,Op.getOperand(I));
4099 }
4100 }
4101SDValue Res = DAG.getNode(ISD::BUILD_VECTOR,DL, IVT, NewOps);
4102return DAG.getBitcast(VT, Res);
4103 }
4104
4105if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4106ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
4107returnlowerBuildVectorOfConstants(Op, DAG, Subtarget);
4108
4109MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
4110
4111auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
4112
4113if (VT.getVectorElementType() == MVT::i1) {
4114// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4115// vector type, we have a legal equivalently-sized i8 type, so we can use
4116// that.
4117MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4118SDValue VecZero = DAG.getConstant(0,DL, WideVecVT);
4119
4120SDValue WideVec;
4121if (SDValueSplat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4122// For a splat, perform a scalar truncate before creating the wider
4123// vector.
4124Splat = DAG.getNode(ISD::AND,DL,Splat.getValueType(),Splat,
4125 DAG.getConstant(1,DL,Splat.getValueType()));
4126 WideVec = DAG.getSplatBuildVector(WideVecVT,DL,Splat);
4127 }else {
4128SmallVector<SDValue, 8> Ops(Op->op_values());
4129 WideVec = DAG.getBuildVector(WideVecVT,DL, Ops);
4130SDValue VecOne = DAG.getConstant(1,DL, WideVecVT);
4131 WideVec = DAG.getNode(ISD::AND,DL, WideVecVT, WideVec, VecOne);
4132 }
4133
4134return DAG.getSetCC(DL, VT, WideVec, VecZero,ISD::SETNE);
4135 }
4136
4137if (SDValueSplat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4138if (auto Gather =matchSplatAsGather(Splat, VT,DL, DAG, Subtarget))
4139return Gather;
4140unsigned Opc = VT.isFloatingPoint() ?RISCVISD::VFMV_V_F_VL
4141 :RISCVISD::VMV_V_X_VL;
4142if (!VT.isFloatingPoint())
4143Splat = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT,Splat);
4144Splat =
4145 DAG.getNode(Opc,DL, ContainerVT, DAG.getUNDEF(ContainerVT),Splat, VL);
4146returnconvertFromScalableVector(VT,Splat, DAG, Subtarget);
4147 }
4148
4149if (SDValue Res =lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4150return Res;
4151
4152// If we're compiling for an exact VLEN value, we can split our work per
4153// register in the register group.
4154if (constauto VLen = Subtarget.getRealVLen();
4155 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4156MVT ElemVT = VT.getVectorElementType();
4157unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4158EVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
4159MVT OneRegVT =MVT::getVectorVT(ElemVT, ElemsPerVReg);
4160MVT M1VT =getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4161assert(M1VT ==getLMUL1VT(M1VT));
4162
4163// The following semantically builds up a fixed length concat_vector
4164// of the component build_vectors. We eagerly lower to scalable and
4165// insert_subvector here to avoid DAG combining it back to a large
4166// build_vector.
4167SmallVector<SDValue> BuildVectorOps(Op->ops());
4168unsigned NumOpElts = M1VT.getVectorMinNumElements();
4169SDValue Vec = DAG.getUNDEF(ContainerVT);
4170for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4171auto OneVRegOfOps =ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4172SDValue SubBV =
4173 DAG.getNode(ISD::BUILD_VECTOR,DL, OneRegVT, OneVRegOfOps);
4174 SubBV =convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4175unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4176 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT, Vec, SubBV,
4177 DAG.getVectorIdxConstant(InsertIdx,DL));
4178 }
4179returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);
4180 }
4181
4182// If we're about to resort to vslide1down (or stack usage), pack our
4183// elements into the widest scalar type we can. This will force a VL/VTYPE
4184// toggle, but reduces the critical path, the number of vslide1down ops
4185// required, and possibly enables scalar folds of the values.
4186if (SDValue Res =lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4187return Res;
4188
4189// For m1 vectors, if we have non-undef values in both halves of our vector,
4190// split the vector into low and high halves, build them separately, then
4191// use a vselect to combine them. For long vectors, this cuts the critical
4192// path of the vslide1down sequence in half, and gives us an opportunity
4193// to special case each half independently. Note that we don't change the
4194// length of the sub-vectors here, so if both fallback to the generic
4195// vslide1down path, we should be able to fold the vselect into the final
4196// vslidedown (for the undef tail) for the first half w/ masking.
4197unsigned NumElts = VT.getVectorNumElements();
4198unsigned NumUndefElts =
4199count_if(Op->op_values(), [](constSDValue &V) { return V.isUndef(); });
4200unsigned NumDefElts = NumElts - NumUndefElts;
4201if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4202 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4203SmallVector<SDValue> SubVecAOps, SubVecBOps;
4204SmallVector<SDValue> MaskVals;
4205SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4206 SubVecAOps.reserve(NumElts);
4207 SubVecBOps.reserve(NumElts);
4208for (unsigned i = 0; i < NumElts; i++) {
4209SDValue Elem =Op->getOperand(i);
4210if (i < NumElts / 2) {
4211 SubVecAOps.push_back(Elem);
4212 SubVecBOps.push_back(UndefElem);
4213 }else {
4214 SubVecAOps.push_back(UndefElem);
4215 SubVecBOps.push_back(Elem);
4216 }
4217bool SelectMaskVal = (i < NumElts / 2);
4218 MaskVals.push_back(DAG.getConstant(SelectMaskVal,DL, XLenVT));
4219 }
4220assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4221 MaskVals.size() == NumElts);
4222
4223SDValue SubVecA = DAG.getBuildVector(VT,DL, SubVecAOps);
4224SDValue SubVecB = DAG.getBuildVector(VT,DL, SubVecBOps);
4225MVT MaskVT =MVT::getVectorVT(MVT::i1, NumElts);
4226SDValue SelectMask = DAG.getBuildVector(MaskVT,DL, MaskVals);
4227return DAG.getNode(ISD::VSELECT,DL, VT, SelectMask, SubVecA, SubVecB);
4228 }
4229
4230// Cap the cost at a value linear to the number of elements in the vector.
4231// The default lowering is to use the stack. The vector store + scalar loads
4232// is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4233// being (at least) linear in LMUL. As a result, using the vslidedown
4234// lowering for every element ends up being VL*LMUL..
4235// TODO: Should we be directly costing the stack alternative? Doing so might
4236// give us a more accurate upper bound.
4237InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4238
4239// TODO: unify with TTI getSlideCost.
4240InstructionCost PerSlideCost = 1;
4241switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4242default:break;
4243caseRISCVII::VLMUL::LMUL_2:
4244 PerSlideCost = 2;
4245break;
4246caseRISCVII::VLMUL::LMUL_4:
4247 PerSlideCost = 4;
4248break;
4249caseRISCVII::VLMUL::LMUL_8:
4250 PerSlideCost = 8;
4251break;
4252 }
4253
4254// TODO: Should we be using the build instseq then cost + evaluate scheme
4255// we use for integer constants here?
4256unsigned UndefCount = 0;
4257for (constSDValue &V :Op->ops()) {
4258if (V.isUndef()) {
4259 UndefCount++;
4260continue;
4261 }
4262if (UndefCount) {
4263 LinearBudget -= PerSlideCost;
4264 UndefCount = 0;
4265 }
4266 LinearBudget -= PerSlideCost;
4267 }
4268if (UndefCount) {
4269 LinearBudget -= PerSlideCost;
4270 }
4271
4272if (LinearBudget < 0)
4273returnSDValue();
4274
4275assert((!VT.isFloatingPoint() ||
4276 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4277"Illegal type which will result in reserved encoding");
4278
4279constunsigned Policy =RISCVII::TAIL_AGNOSTIC |RISCVII::MASK_AGNOSTIC;
4280
4281SDValue Vec;
4282 UndefCount = 0;
4283for (SDValue V :Op->ops()) {
4284if (V.isUndef()) {
4285 UndefCount++;
4286continue;
4287 }
4288
4289// Start our sequence with a TA splat in the hopes that hardware is able to
4290// recognize there's no dependency on the prior value of our temporary
4291// register.
4292if (!Vec) {
4293 Vec = DAG.getSplatVector(VT,DL, V);
4294 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4295 UndefCount = 0;
4296continue;
4297 }
4298
4299if (UndefCount) {
4300constSDValueOffset = DAG.getConstant(UndefCount,DL, Subtarget.getXLenVT());
4301 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4302 Vec,Offset, Mask, VL, Policy);
4303 UndefCount = 0;
4304 }
4305auto OpCode =
4306 VT.isFloatingPoint() ?RISCVISD::VFSLIDE1DOWN_VL :RISCVISD::VSLIDE1DOWN_VL;
4307if (!VT.isFloatingPoint())
4308 V = DAG.getNode(ISD::ANY_EXTEND,DL, Subtarget.getXLenVT(), V);
4309 Vec = DAG.getNode(OpCode,DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4310 V, Mask, VL);
4311 }
4312if (UndefCount) {
4313constSDValueOffset = DAG.getConstant(UndefCount,DL, Subtarget.getXLenVT());
4314 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4315 Vec,Offset, Mask, VL, Policy);
4316 }
4317returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);
4318}
4319
4320staticSDValuesplatPartsI64WithVL(constSDLoc &DL,MVT VT,SDValue Passthru,
4321SDValueLo,SDValueHi,SDValue VL,
4322SelectionDAG &DAG) {
4323if (!Passthru)
4324 Passthru = DAG.getUNDEF(VT);
4325if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4326 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4327 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4328// If Hi constant is all the same sign bit as Lo, lower this as a custom
4329// node in order to try and match RVV vector/scalar instructions.
4330if ((LoC >> 31) == HiC)
4331return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru,Lo, VL);
4332
4333// If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4334// we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4335// vlmax vsetvli or vsetivli to change the VL.
4336// FIXME: Support larger constants?
4337// FIXME: Support non-constant VLs by saturating?
4338if (LoC == HiC) {
4339SDValue NewVL;
4340if (isAllOnesConstant(VL) ||
4341 (isa<RegisterSDNode>(VL) &&
4342 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4343 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4344elseif (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4345 NewVL = DAG.getNode(ISD::ADD,DL, VL.getValueType(), VL, VL);
4346
4347if (NewVL) {
4348MVT InterVT =
4349MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4350auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, InterVT,
4351 DAG.getUNDEF(InterVT),Lo, NewVL);
4352return DAG.getNode(ISD::BITCAST,DL, VT, InterVec);
4353 }
4354 }
4355 }
4356
4357// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4358if (Hi.getOpcode() ==ISD::SRA &&Hi.getOperand(0) ==Lo &&
4359 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4360Hi.getConstantOperandVal(1) == 31)
4361return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru,Lo, VL);
4362
4363// If the hi bits of the splat are undefined, then it's fine to just splat Lo
4364// even if it might be sign extended.
4365if (Hi.isUndef())
4366return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru,Lo, VL);
4367
4368// Fall back to a stack store and stride x0 vector load.
4369return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL,DL, VT, Passthru,Lo,
4370Hi, VL);
4371}
4372
4373// Called by type legalization to handle splat of i64 on RV32.
4374// FIXME: We can optimize this when the type has sign or zero bits in one
4375// of the halves.
4376staticSDValuesplatSplitI64WithVL(constSDLoc &DL,MVT VT,SDValue Passthru,
4377SDValue Scalar,SDValue VL,
4378SelectionDAG &DAG) {
4379assert(Scalar.getValueType() == MVT::i64 &&"Unexpected VT!");
4380SDValueLo,Hi;
4381 std::tie(Lo,Hi) = DAG.SplitScalar(Scalar,DL, MVT::i32, MVT::i32);
4382returnsplatPartsI64WithVL(DL, VT, Passthru,Lo,Hi, VL, DAG);
4383}
4384
4385// This function lowers a splat of a scalar operand Splat with the vector
4386// length VL. It ensures the final sequence is type legal, which is useful when
4387// lowering a splat after type legalization.
4388staticSDValuelowerScalarSplat(SDValue Passthru,SDValue Scalar,SDValue VL,
4389MVT VT,constSDLoc &DL,SelectionDAG &DAG,
4390constRISCVSubtarget &Subtarget) {
4391bool HasPassthru = Passthru && !Passthru.isUndef();
4392if (!HasPassthru && !Passthru)
4393 Passthru = DAG.getUNDEF(VT);
4394
4395MVT EltVT = VT.getVectorElementType();
4396MVT XLenVT = Subtarget.getXLenVT();
4397
4398if (VT.isFloatingPoint()) {
4399if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4400 EltVT == MVT::bf16) {
4401if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4402 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4403 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Scalar);
4404else
4405 Scalar = DAG.getNode(ISD::BITCAST,DL, MVT::i16, Scalar);
4406MVT IVT = VT.changeVectorElementType(MVT::i16);
4407 Passthru = DAG.getNode(ISD::BITCAST,DL, IVT, Passthru);
4408SDValueSplat =
4409lowerScalarSplat(Passthru, Scalar, VL, IVT,DL, DAG, Subtarget);
4410return DAG.getNode(ISD::BITCAST,DL, VT,Splat);
4411 }
4412return DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, VT, Passthru, Scalar, VL);
4413 }
4414
4415// Simplest case is that the operand needs to be promoted to XLenVT.
4416if (Scalar.getValueType().bitsLE(XLenVT)) {
4417// If the operand is a constant, sign extend to increase our chances
4418// of being able to use a .vi instruction. ANY_EXTEND would become a
4419// a zero extend and the simm5 check in isel would fail.
4420// FIXME: Should we ignore the upper bits in isel instead?
4421unsigned ExtOpc =
4422 isa<ConstantSDNode>(Scalar) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;
4423 Scalar = DAG.getNode(ExtOpc,DL, XLenVT, Scalar);
4424return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru, Scalar, VL);
4425 }
4426
4427assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4428"Unexpected scalar for splat lowering!");
4429
4430if (isOneConstant(VL) &&isNullConstant(Scalar))
4431return DAG.getNode(RISCVISD::VMV_S_X_VL,DL, VT, Passthru,
4432 DAG.getConstant(0,DL, XLenVT), VL);
4433
4434// Otherwise use the more complicated splatting algorithm.
4435returnsplatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4436}
4437
4438// This function lowers an insert of a scalar operand Scalar into lane
4439// 0 of the vector regardless of the value of VL. The contents of the
4440// remaining lanes of the result vector are unspecified. VL is assumed
4441// to be non-zero.
4442staticSDValuelowerScalarInsert(SDValue Scalar,SDValue VL,MVT VT,
4443constSDLoc &DL,SelectionDAG &DAG,
4444constRISCVSubtarget &Subtarget) {
4445assert(VT.isScalableVector() &&"Expect VT is scalable vector type.");
4446
4447constMVT XLenVT = Subtarget.getXLenVT();
4448SDValue Passthru = DAG.getUNDEF(VT);
4449
4450if (Scalar.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&
4451isNullConstant(Scalar.getOperand(1))) {
4452SDValue ExtractedVal = Scalar.getOperand(0);
4453// The element types must be the same.
4454if (ExtractedVal.getValueType().getVectorElementType() ==
4455 VT.getVectorElementType()) {
4456MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4457MVT ExtractedContainerVT = ExtractedVT;
4458if (ExtractedContainerVT.isFixedLengthVector()) {
4459 ExtractedContainerVT =getContainerForFixedLengthVector(
4460 DAG, ExtractedContainerVT, Subtarget);
4461 ExtractedVal =convertToScalableVector(ExtractedContainerVT,
4462 ExtractedVal, DAG, Subtarget);
4463 }
4464if (ExtractedContainerVT.bitsLE(VT))
4465return DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, Passthru,
4466 ExtractedVal, DAG.getVectorIdxConstant(0,DL));
4467return DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, ExtractedVal,
4468 DAG.getVectorIdxConstant(0,DL));
4469 }
4470 }
4471
4472
4473if (VT.isFloatingPoint())
4474return DAG.getNode(RISCVISD::VFMV_S_F_VL,DL, VT,
4475 DAG.getUNDEF(VT), Scalar, VL);
4476
4477// Avoid the tricky legalization cases by falling back to using the
4478// splat code which already handles it gracefully.
4479if (!Scalar.getValueType().bitsLE(XLenVT))
4480returnlowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4481 DAG.getConstant(1,DL, XLenVT),
4482 VT,DL, DAG, Subtarget);
4483
4484// If the operand is a constant, sign extend to increase our chances
4485// of being able to use a .vi instruction. ANY_EXTEND would become a
4486// a zero extend and the simm5 check in isel would fail.
4487// FIXME: Should we ignore the upper bits in isel instead?
4488unsigned ExtOpc =
4489 isa<ConstantSDNode>(Scalar) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;
4490 Scalar = DAG.getNode(ExtOpc,DL, XLenVT, Scalar);
4491return DAG.getNode(RISCVISD::VMV_S_X_VL,DL, VT, DAG.getUNDEF(VT), Scalar,
4492 VL);
4493}
4494
4495// Can this shuffle be performed on exactly one (possibly larger) input?
4496staticSDValuegetSingleShuffleSrc(MVT VT,MVT ContainerVT,SDValue V1,
4497SDValue V2) {
4498
4499if (V2.isUndef() &&
4500RISCVTargetLowering::getLMUL(ContainerVT) !=RISCVII::VLMUL::LMUL_8)
4501return V1;
4502
4503// Both input must be extracts.
4504if (V1.getOpcode() !=ISD::EXTRACT_SUBVECTOR ||
4505 V2.getOpcode() !=ISD::EXTRACT_SUBVECTOR)
4506returnSDValue();
4507
4508// Extracting from the same source.
4509SDValue Src = V1.getOperand(0);
4510if (Src != V2.getOperand(0))
4511returnSDValue();
4512
4513// Src needs to have twice the number of elements.
4514unsigned NumElts = VT.getVectorNumElements();
4515if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4516returnSDValue();
4517
4518// The extracts must extract the two halves of the source.
4519if (V1.getConstantOperandVal(1) != 0 ||
4520 V2.getConstantOperandVal(1) != NumElts)
4521returnSDValue();
4522
4523return Src;
4524}
4525
4526/// Is this shuffle interleaving contiguous elements from one vector into the
4527/// even elements and contiguous elements from another vector into the odd
4528/// elements. \p EvenSrc will contain the element that should be in the first
4529/// even element. \p OddSrc will contain the element that should be in the first
4530/// odd element. These can be the first element in a source or the element half
4531/// way through the source.
4532staticboolisInterleaveShuffle(ArrayRef<int> Mask,MVT VT,int &EvenSrc,
4533int &OddSrc,constRISCVSubtarget &Subtarget) {
4534// We need to be able to widen elements to the next larger integer type.
4535if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4536returnfalse;
4537
4538intSize = Mask.size();
4539int NumElts = VT.getVectorNumElements();
4540assert(Size == (int)NumElts &&"Unexpected mask size");
4541
4542SmallVector<unsigned, 2> StartIndexes;
4543if (!ShuffleVectorInst::isInterleaveMask(Mask, 2,Size * 2, StartIndexes))
4544returnfalse;
4545
4546 EvenSrc = StartIndexes[0];
4547 OddSrc = StartIndexes[1];
4548
4549// One source should be low half of first vector.
4550if (EvenSrc != 0 && OddSrc != 0)
4551returnfalse;
4552
4553// Subvectors will be subtracted from either at the start of the two input
4554// vectors, or at the start and middle of the first vector if it's an unary
4555// interleave.
4556// In both cases, HalfNumElts will be extracted.
4557// We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4558// we'll create an illegal extract_subvector.
4559// FIXME: We could support other values using a slidedown first.
4560int HalfNumElts = NumElts / 2;
4561return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4562}
4563
4564/// Match shuffles that concatenate two vectors, rotate the concatenation,
4565/// and then extract the original number of elements from the rotated result.
4566/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4567/// returned rotation amount is for a rotate right, where elements move from
4568/// higher elements to lower elements. \p LoSrc indicates the first source
4569/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4570/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4571/// 0 or 1 if a rotation is found.
4572///
4573/// NOTE: We talk about rotate to the right which matches how bit shift and
4574/// rotate instructions are described where LSBs are on the right, but LLVM IR
4575/// and the table below write vectors with the lowest elements on the left.
4576staticintisElementRotate(int &LoSrc,int &HiSrc,ArrayRef<int> Mask) {
4577intSize = Mask.size();
4578
4579// We need to detect various ways of spelling a rotation:
4580// [11, 12, 13, 14, 15, 0, 1, 2]
4581// [-1, 12, 13, 14, -1, -1, 1, -1]
4582// [-1, -1, -1, -1, -1, -1, 1, 2]
4583// [ 3, 4, 5, 6, 7, 8, 9, 10]
4584// [-1, 4, 5, 6, -1, -1, 9, -1]
4585// [-1, 4, 5, 6, -1, -1, -1, -1]
4586int Rotation = 0;
4587 LoSrc = -1;
4588 HiSrc = -1;
4589for (int i = 0; i !=Size; ++i) {
4590int M = Mask[i];
4591if (M < 0)
4592continue;
4593
4594// Determine where a rotate vector would have started.
4595int StartIdx = i - (M %Size);
4596// The identity rotation isn't interesting, stop.
4597if (StartIdx == 0)
4598return -1;
4599
4600// If we found the tail of a vector the rotation must be the missing
4601// front. If we found the head of a vector, it must be how much of the
4602// head.
4603int CandidateRotation = StartIdx < 0 ? -StartIdx :Size - StartIdx;
4604
4605if (Rotation == 0)
4606 Rotation = CandidateRotation;
4607elseif (Rotation != CandidateRotation)
4608// The rotations don't match, so we can't match this mask.
4609return -1;
4610
4611// Compute which value this mask is pointing at.
4612int MaskSrc = M <Size ? 0 : 1;
4613
4614// Compute which of the two target values this index should be assigned to.
4615// This reflects whether the high elements are remaining or the low elemnts
4616// are remaining.
4617int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4618
4619// Either set up this value if we've not encountered it before, or check
4620// that it remains consistent.
4621if (TargetSrc < 0)
4622 TargetSrc = MaskSrc;
4623elseif (TargetSrc != MaskSrc)
4624// This may be a rotation, but it pulls from the inputs in some
4625// unsupported interleaving.
4626return -1;
4627 }
4628
4629// Check that we successfully analyzed the mask, and normalize the results.
4630assert(Rotation != 0 &&"Failed to locate a viable rotation!");
4631assert((LoSrc >= 0 || HiSrc >= 0) &&
4632"Failed to find a rotated input vector!");
4633
4634return Rotation;
4635}
4636
4637// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4638// 2, 4, 8 and the integer type Factor-times larger than VT's
4639// element type must be a legal element type.
4640// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4641// -> [p, q, r, s] (Factor=2, Index=1)
4642staticSDValuegetDeinterleaveShiftAndTrunc(constSDLoc &DL,MVT VT,
4643SDValue Src,unsigned Factor,
4644unsigned Index,SelectionDAG &DAG) {
4645unsigned EltBits = VT.getScalarSizeInBits();
4646ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4647MVT WideSrcVT =MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4648 SrcEC.divideCoefficientBy(Factor));
4649MVT ResVT =MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4650 SrcEC.divideCoefficientBy(Factor));
4651 Src = DAG.getBitcast(WideSrcVT, Src);
4652
4653unsigned Shift = Index * EltBits;
4654SDValue Res = DAG.getNode(ISD::SRL,DL, WideSrcVT, Src,
4655 DAG.getConstant(Shift,DL, WideSrcVT));
4656 Res = DAG.getNode(ISD::TRUNCATE,DL, ResVT, Res);
4657MVT IntVT = VT.changeVectorElementTypeToInteger();
4658 Res = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, IntVT, DAG.getUNDEF(IntVT), Res,
4659 DAG.getVectorIdxConstant(0,DL));
4660return DAG.getBitcast(VT, Res);
4661}
4662
4663// Lower the following shuffle to vslidedown.
4664// a)
4665// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4666// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4667// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4668// b)
4669// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4670// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4671// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4672// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4673// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4674// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4675staticSDValuelowerVECTOR_SHUFFLEAsVSlidedown(constSDLoc &DL,MVT VT,
4676SDValue V1,SDValue V2,
4677ArrayRef<int> Mask,
4678constRISCVSubtarget &Subtarget,
4679SelectionDAG &DAG) {
4680auto findNonEXTRACT_SUBVECTORParent =
4681 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4682uint64_tOffset = 0;
4683while (Parent.getOpcode() ==ISD::EXTRACT_SUBVECTOR &&
4684// EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4685// a scalable vector. But we don't want to match the case.
4686 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4687Offset += Parent.getConstantOperandVal(1);
4688 Parent = Parent.getOperand(0);
4689 }
4690return std::make_pair(Parent,Offset);
4691 };
4692
4693auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4694auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4695
4696// Extracting from the same source.
4697SDValue Src = V1Src;
4698if (Src != V2Src)
4699returnSDValue();
4700
4701// Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4702SmallVector<int, 16> NewMask(Mask);
4703for (size_t i = 0; i != NewMask.size(); ++i) {
4704if (NewMask[i] == -1)
4705continue;
4706
4707if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4708 NewMask[i] = NewMask[i] + V1IndexOffset;
4709 }else {
4710// Minus NewMask.size() is needed. Otherwise, the b case would be
4711// <5,6,7,12> instead of <5,6,7,8>.
4712 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4713 }
4714 }
4715
4716// First index must be known and non-zero. It will be used as the slidedown
4717// amount.
4718if (NewMask[0] <= 0)
4719returnSDValue();
4720
4721// NewMask is also continuous.
4722for (unsigned i = 1; i != NewMask.size(); ++i)
4723if (NewMask[i - 1] + 1 != NewMask[i])
4724returnSDValue();
4725
4726MVT XLenVT = Subtarget.getXLenVT();
4727MVT SrcVT = Src.getSimpleValueType();
4728MVT ContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4729auto [TrueMask, VL] =getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);
4730SDValue Slidedown =
4731getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4732convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4733 DAG.getConstant(NewMask[0],DL, XLenVT), TrueMask, VL);
4734return DAG.getNode(
4735ISD::EXTRACT_SUBVECTOR,DL, VT,
4736convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4737 DAG.getConstant(0,DL, XLenVT));
4738}
4739
4740// Because vslideup leaves the destination elements at the start intact, we can
4741// use it to perform shuffles that insert subvectors:
4742//
4743// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4744// ->
4745// vsetvli zero, 8, e8, mf2, ta, ma
4746// vslideup.vi v8, v9, 4
4747//
4748// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4749// ->
4750// vsetvli zero, 5, e8, mf2, tu, ma
4751// vslideup.v1 v8, v9, 2
4752staticSDValuelowerVECTOR_SHUFFLEAsVSlideup(constSDLoc &DL,MVT VT,
4753SDValue V1,SDValue V2,
4754ArrayRef<int> Mask,
4755constRISCVSubtarget &Subtarget,
4756SelectionDAG &DAG) {
4757unsigned NumElts = VT.getVectorNumElements();
4758int NumSubElts, Index;
4759if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4760 Index))
4761returnSDValue();
4762
4763bool OpsSwapped = Mask[Index] < (int)NumElts;
4764SDValue InPlace = OpsSwapped ? V2 : V1;
4765SDValue ToInsert = OpsSwapped ? V1 : V2;
4766
4767MVT XLenVT = Subtarget.getXLenVT();
4768MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
4769auto TrueMask =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).first;
4770// We slide up by the index that the subvector is being inserted at, and set
4771// VL to the index + the number of elements being inserted.
4772unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED |RISCVII::MASK_AGNOSTIC;
4773// If the we're adding a suffix to the in place vector, i.e. inserting right
4774// up to the very end of it, then we don't actually care about the tail.
4775if (NumSubElts + Index >= (int)NumElts)
4776 Policy |=RISCVII::TAIL_AGNOSTIC;
4777
4778 InPlace =convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4779 ToInsert =convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4780SDValue VL = DAG.getConstant(NumSubElts + Index,DL, XLenVT);
4781
4782SDValue Res;
4783// If we're inserting into the lowest elements, use a tail undisturbed
4784// vmv.v.v.
4785if (Index == 0)
4786 Res = DAG.getNode(RISCVISD::VMV_V_V_VL,DL, ContainerVT, InPlace, ToInsert,
4787 VL);
4788else
4789 Res =getVSlideup(DAG, Subtarget,DL, ContainerVT, InPlace, ToInsert,
4790 DAG.getConstant(Index,DL, XLenVT), TrueMask, VL, Policy);
4791returnconvertFromScalableVector(VT, Res, DAG, Subtarget);
4792}
4793
4794/// Match v(f)slide1up/down idioms. These operations involve sliding
4795/// N-1 elements to make room for an inserted scalar at one end.
4796staticSDValuelowerVECTOR_SHUFFLEAsVSlide1(constSDLoc &DL,MVT VT,
4797SDValue V1,SDValue V2,
4798ArrayRef<int> Mask,
4799constRISCVSubtarget &Subtarget,
4800SelectionDAG &DAG) {
4801bool OpsSwapped =false;
4802if (!isa<BuildVectorSDNode>(V1)) {
4803if (!isa<BuildVectorSDNode>(V2))
4804returnSDValue();
4805std::swap(V1, V2);
4806 OpsSwapped =true;
4807 }
4808SDValueSplat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4809if (!Splat)
4810returnSDValue();
4811
4812// Return true if the mask could describe a slide of Mask.size() - 1
4813// elements from concat_vector(V1, V2)[Base:] to [Offset:].
4814auto isSlideMask = [](ArrayRef<int> Mask,unsignedBase,intOffset) {
4815constunsigned S = (Offset > 0) ? 0 : -Offset;
4816constunsigned E = Mask.size() - ((Offset > 0) ?Offset : 0);
4817for (unsigned i = S; i != E; ++i)
4818if (Mask[i] >= 0 && (unsigned)Mask[i] !=Base + i +Offset)
4819returnfalse;
4820returntrue;
4821 };
4822
4823constunsigned NumElts = VT.getVectorNumElements();
4824bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4825if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4826returnSDValue();
4827
4828constint InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4829// Inserted lane must come from splat, undef scalar is legal but not profitable.
4830if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4831returnSDValue();
4832
4833MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
4834auto [TrueMask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
4835
4836// zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4837// vslide1{down,up}.vx instead.
4838if (VT.getVectorElementType() == MVT::bf16 ||
4839 (VT.getVectorElementType() == MVT::f16 &&
4840 !Subtarget.hasVInstructionsF16())) {
4841MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4842Splat =
4843 DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, Subtarget.getXLenVT(),Splat);
4844 V2 = DAG.getBitcast(
4845 IntVT,convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4846SDValue Vec = DAG.getNode(
4847 IsVSlidedown ?RISCVISD::VSLIDE1DOWN_VL :RISCVISD::VSLIDE1UP_VL,DL,
4848 IntVT, DAG.getUNDEF(IntVT), V2,Splat, TrueMask, VL);
4849 Vec = DAG.getBitcast(ContainerVT, Vec);
4850returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);
4851 }
4852
4853auto OpCode = IsVSlidedown ?
4854 (VT.isFloatingPoint() ?RISCVISD::VFSLIDE1DOWN_VL :RISCVISD::VSLIDE1DOWN_VL) :
4855 (VT.isFloatingPoint() ?RISCVISD::VFSLIDE1UP_VL :RISCVISD::VSLIDE1UP_VL);
4856if (!VT.isFloatingPoint())
4857Splat = DAG.getNode(ISD::ANY_EXTEND,DL, Subtarget.getXLenVT(),Splat);
4858auto Vec = DAG.getNode(OpCode,DL, ContainerVT,
4859 DAG.getUNDEF(ContainerVT),
4860convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4861Splat, TrueMask, VL);
4862returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);
4863}
4864
4865// Match a mask which "spreads" the leading elements of a vector evenly
4866// across the result. Factor is the spread amount, and Index is the
4867// offset applied. (on success, Index < Factor) This is the inverse
4868// of a deinterleave with the same Factor and Index. This is analogous
4869// to an interleave, except that all but one lane is undef.
4870staticboolisSpreadMask(ArrayRef<int> Mask,unsigned Factor,unsigned &Index) {
4871SmallVector<bool> LaneIsUndef(Factor,true);
4872for (unsigned i = 0; i < Mask.size(); i++)
4873 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4874
4875bool Found =false;
4876for (unsigned i = 0; i < Factor; i++) {
4877if (LaneIsUndef[i])
4878continue;
4879if (Found)
4880returnfalse;
4881 Index = i;
4882 Found =true;
4883 }
4884if (!Found)
4885returnfalse;
4886
4887for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4888unsigned j = i * Factor + Index;
4889if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4890returnfalse;
4891 }
4892returntrue;
4893}
4894
4895// Given a vector a, b, c, d return a vector Factor times longer
4896// with Factor-1 undef's between elements. Ex:
4897// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4898// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4899staticSDValuegetWideningSpread(SDValue V,unsigned Factor,unsigned Index,
4900constSDLoc &DL,SelectionDAG &DAG) {
4901
4902MVT VT = V.getSimpleValueType();
4903unsigned EltBits = VT.getScalarSizeInBits();
4904ElementCount EC = VT.getVectorElementCount();
4905 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4906
4907MVT WideVT =MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4908
4909SDValue Result = DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT, V);
4910// TODO: On rv32, the constant becomes a splat_vector_parts which does not
4911// allow the SHL to fold away if Index is 0.
4912if (Index != 0)
4913 Result = DAG.getNode(ISD::SHL,DL, WideVT, Result,
4914 DAG.getConstant(EltBits * Index,DL, WideVT));
4915// Make sure to use original element type
4916MVT ResultVT =MVT::getVectorVT(VT.getVectorElementType(),
4917 EC.multiplyCoefficientBy(Factor));
4918return DAG.getBitcast(ResultVT, Result);
4919}
4920
4921// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4922// to create an interleaved vector of <[vscale x] n*2 x ty>.
4923// This requires that the size of ty is less than the subtarget's maximum ELEN.
4924staticSDValuegetWideningInterleave(SDValue EvenV,SDValue OddV,
4925constSDLoc &DL,SelectionDAG &DAG,
4926constRISCVSubtarget &Subtarget) {
4927
4928// FIXME: Not only does this optimize the code, it fixes some correctness
4929// issues because MIR does not have freeze.
4930if (EvenV.isUndef())
4931returngetWideningSpread(OddV, 2, 1,DL, DAG);
4932if (OddV.isUndef())
4933returngetWideningSpread(EvenV, 2, 0,DL, DAG);
4934
4935MVT VecVT = EvenV.getSimpleValueType();
4936MVT VecContainerVT = VecVT;// <vscale x n x ty>
4937// Convert fixed vectors to scalable if needed
4938if (VecContainerVT.isFixedLengthVector()) {
4939 VecContainerVT =getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4940 EvenV =convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4941 OddV =convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4942 }
4943
4944assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4945
4946// We're working with a vector of the same size as the resulting
4947// interleaved vector, but with half the number of elements and
4948// twice the SEW (Hence the restriction on not using the maximum
4949// ELEN)
4950MVT WideVT =
4951MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
4952 VecVT.getVectorElementCount());
4953MVT WideContainerVT = WideVT;// <vscale x n x ty*2>
4954if (WideContainerVT.isFixedLengthVector())
4955 WideContainerVT =getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4956
4957// Bitcast the input vectors to integers in case they are FP
4958 VecContainerVT = VecContainerVT.changeTypeToInteger();
4959 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4960 OddV = DAG.getBitcast(VecContainerVT, OddV);
4961
4962auto [Mask, VL] =getDefaultVLOps(VecVT, VecContainerVT,DL, DAG, Subtarget);
4963SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4964
4965SDValue Interleaved;
4966if (Subtarget.hasStdExtZvbb()) {
4967// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4968SDValue OffsetVec =
4969 DAG.getConstant(VecVT.getScalarSizeInBits(),DL, VecContainerVT);
4970 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL,DL, WideContainerVT, OddV,
4971 OffsetVec, Passthru, Mask, VL);
4972 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL,DL, WideContainerVT,
4973 Interleaved, EvenV, Passthru, Mask, VL);
4974 }else {
4975// FIXME: We should freeze the odd vector here. We already handled the case
4976// of provably undef/poison above.
4977
4978// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4979// vwaddu.vv
4980 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL,DL, WideContainerVT, EvenV,
4981 OddV, Passthru, Mask, VL);
4982
4983// Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4984SDValue AllOnesVec = DAG.getSplatVector(
4985 VecContainerVT,DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4986SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL,DL, WideContainerVT,
4987 OddV, AllOnesVec, Passthru, Mask, VL);
4988
4989// Add the two together so we get
4990// (OddV * 0xff...ff) + (OddV + EvenV)
4991// = (OddV * 0x100...00) + EvenV
4992// = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4993// Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4994 Interleaved = DAG.getNode(RISCVISD::ADD_VL,DL, WideContainerVT,
4995 Interleaved, OddsMul, Passthru, Mask, VL);
4996 }
4997
4998// Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4999MVT ResultContainerVT =MVT::getVectorVT(
5000 VecVT.getVectorElementType(),// Make sure to use original type
5001 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5002 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5003
5004// Convert back to a fixed vector if needed
5005MVT ResultVT =
5006MVT::getVectorVT(VecVT.getVectorElementType(),
5007 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
5008if (ResultVT.isFixedLengthVector())
5009 Interleaved =
5010convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5011
5012return Interleaved;
5013}
5014
5015// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5016// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5017staticSDValuelowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
5018SelectionDAG &DAG,
5019constRISCVSubtarget &Subtarget) {
5020SDLocDL(SVN);
5021MVT VT = SVN->getSimpleValueType(0);
5022SDValue V = SVN->getOperand(0);
5023unsigned NumElts = VT.getVectorNumElements();
5024
5025assert(VT.getVectorElementType() == MVT::i1);
5026
5027if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
5028 SVN->getMask().size()) ||
5029 !SVN->getOperand(1).isUndef())
5030returnSDValue();
5031
5032unsigned ViaEltSize = std::max((uint64_t)8,PowerOf2Ceil(NumElts));
5033EVT ViaVT =EVT::getVectorVT(
5034 *DAG.getContext(),EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5035EVT ViaBitVT =
5036EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5037
5038// If we don't have zvbb or the larger element type > ELEN, the operation will
5039// be illegal.
5040if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
5041 ViaVT) ||
5042 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5043returnSDValue();
5044
5045// If the bit vector doesn't fit exactly into the larger element type, we need
5046// to insert it into the larger vector and then shift up the reversed bits
5047// afterwards to get rid of the gap introduced.
5048if (ViaEltSize > NumElts)
5049 V = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5050 V, DAG.getVectorIdxConstant(0,DL));
5051
5052SDValue Res =
5053 DAG.getNode(ISD::BITREVERSE,DL, ViaVT, DAG.getBitcast(ViaVT, V));
5054
5055// Shift up the reversed bits if the vector didn't exactly fit into the larger
5056// element type.
5057if (ViaEltSize > NumElts)
5058 Res = DAG.getNode(ISD::SRL,DL, ViaVT, Res,
5059 DAG.getConstant(ViaEltSize - NumElts,DL, ViaVT));
5060
5061 Res = DAG.getBitcast(ViaBitVT, Res);
5062
5063if (ViaEltSize > NumElts)
5064 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, Res,
5065 DAG.getVectorIdxConstant(0,DL));
5066return Res;
5067}
5068
5069staticboolisLegalBitRotate(ShuffleVectorSDNode *SVN,
5070SelectionDAG &DAG,
5071constRISCVSubtarget &Subtarget,
5072MVT &RotateVT,unsigned &RotateAmt) {
5073SDLocDL(SVN);
5074
5075EVT VT = SVN->getValueType(0);
5076unsigned NumElts = VT.getVectorNumElements();
5077unsigned EltSizeInBits = VT.getScalarSizeInBits();
5078unsigned NumSubElts;
5079if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5080 NumElts, NumSubElts, RotateAmt))
5081returnfalse;
5082 RotateVT =MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5083 NumElts / NumSubElts);
5084
5085// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5086return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5087}
5088
5089// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5090// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5091// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5092staticSDValuelowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
5093SelectionDAG &DAG,
5094constRISCVSubtarget &Subtarget) {
5095SDLocDL(SVN);
5096
5097EVT VT = SVN->getValueType(0);
5098unsigned RotateAmt;
5099MVT RotateVT;
5100if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5101returnSDValue();
5102
5103SDValueOp = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5104
5105SDValue Rotate;
5106// A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5107// so canonicalize to vrev8.
5108if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5109 Rotate = DAG.getNode(ISD::BSWAP,DL, RotateVT,Op);
5110else
5111 Rotate = DAG.getNode(ISD::ROTL,DL, RotateVT,Op,
5112 DAG.getConstant(RotateAmt,DL, RotateVT));
5113
5114return DAG.getBitcast(VT, Rotate);
5115}
5116
5117// If compiling with an exactly known VLEN, see if we can split a
5118// shuffle on m2 or larger into a small number of m1 sized shuffles
5119// which write each destination registers exactly once.
5120staticSDValuelowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
5121SelectionDAG &DAG,
5122constRISCVSubtarget &Subtarget) {
5123SDLocDL(SVN);
5124MVT VT = SVN->getSimpleValueType(0);
5125SDValue V1 = SVN->getOperand(0);
5126SDValue V2 = SVN->getOperand(1);
5127ArrayRef<int> Mask = SVN->getMask();
5128
5129// If we don't know exact data layout, not much we can do. If this
5130// is already m1 or smaller, no point in splitting further.
5131constauto VLen = Subtarget.getRealVLen();
5132if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5133returnSDValue();
5134
5135// Avoid picking up bitrotate patterns which we have a linear-in-lmul
5136// expansion for.
5137unsigned RotateAmt;
5138MVT RotateVT;
5139if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5140returnSDValue();
5141
5142MVT ElemVT = VT.getVectorElementType();
5143unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5144
5145EVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
5146MVT OneRegVT =MVT::getVectorVT(ElemVT, ElemsPerVReg);
5147MVT M1VT =getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5148assert(M1VT ==getLMUL1VT(M1VT));
5149unsigned NumOpElts = M1VT.getVectorMinNumElements();
5150unsigned NumElts = ContainerVT.getVectorMinNumElements();
5151unsigned NumOfSrcRegs = NumElts / NumOpElts;
5152unsigned NumOfDestRegs = NumElts / NumOpElts;
5153// The following semantically builds up a fixed length concat_vector
5154// of the component shuffle_vectors. We eagerly lower to scalable here
5155// to avoid DAG combining it back to a large shuffle_vector again.
5156 V1 =convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5157 V2 =convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5158SmallVector<SmallVector<std::tuple<unsigned, unsigned, SmallVector<int>>>>
5159Operands;
5160processShuffleMasks(
5161 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5162 [&]() {Operands.emplace_back(); },
5163 [&](ArrayRef<int> SrcSubMask,unsigned SrcVecIdx,unsigned DstVecIdx) {
5164Operands.emplace_back().emplace_back(
5165 SrcVecIdx, UINT_MAX,
5166SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5167 },
5168 [&](ArrayRef<int> SrcSubMask,unsigned Idx1,unsigned Idx2,bool NewReg) {
5169if (NewReg)
5170Operands.emplace_back();
5171Operands.back().emplace_back(
5172 Idx1, Idx2,SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5173 });
5174assert(Operands.size() == NumOfDestRegs &&"Whole vector must be processed");
5175// Note: check that we do not emit too many shuffles here to prevent code
5176// size explosion.
5177// TODO: investigate, if it can be improved by extra analysis of the masks to
5178// check if the code is more profitable.
5179unsigned NumShuffles = std::accumulate(
5180Operands.begin(),Operands.end(), 0u,
5181 [&](unsignedN,
5182ArrayRef<std::tuple<unsigned,unsigned,SmallVector<int>>>Data) {
5183 if (Data.empty())
5184 return N;
5185 N += Data.size();
5186 for (const auto &P : Data) {
5187 unsigned Idx2 = std::get<1>(P);
5188 ArrayRef<int> Mask = std::get<2>(P);
5189 if (Idx2 != UINT_MAX)
5190 ++N;
5191 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5192 --N;
5193 }
5194returnN;
5195 });
5196if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5197 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5198returnSDValue();
5199auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec,unsigned ExtractIdx) {
5200SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, SrcVec,
5201 DAG.getVectorIdxConstant(ExtractIdx,DL));
5202 SubVec =convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5203return SubVec;
5204 };
5205auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1,SDValue SubVec2,
5206ArrayRef<int>Mask) {
5207SDValue SubVec = DAG.getVectorShuffle(OneRegVT,DL, SubVec1, SubVec2, Mask);
5208return SubVec;
5209 };
5210SDValue Vec = DAG.getUNDEF(ContainerVT);
5211for (auto [I,Data] :enumerate(Operands)) {
5212if (Data.empty())
5213continue;
5214SmallDenseMap<unsigned, SDValue, 4> Values;
5215for (unsignedI : seq<unsigned>(Data.size())) {
5216constauto &[Idx1, Idx2,_] =Data[I];
5217if (Values.contains(Idx1)) {
5218assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5219"Expected both indices to be extracted already.");
5220break;
5221 }
5222SDValueV = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5223 (Idx1 % NumOfSrcRegs) * NumOpElts);
5224 Values[Idx1] =V;
5225if (Idx2 != UINT_MAX)
5226 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5227 (Idx2 % NumOfSrcRegs) * NumOpElts);
5228 }
5229SDValueV;
5230for (constauto &[Idx1, Idx2, Mask] :Data) {
5231SDValue V1 = Values.at(Idx1);
5232SDValueV2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5233V = PerformShuffle(V1, V2, Mask);
5234 Values[Idx1] =V;
5235 }
5236
5237unsigned InsertIdx =I * NumOpElts;
5238V =convertToScalableVector(M1VT, V, DAG, Subtarget);
5239 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT, Vec, V,
5240 DAG.getVectorIdxConstant(InsertIdx,DL));
5241 }
5242returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);
5243}
5244
5245// Matches a subset of compress masks with a contiguous prefix of output
5246// elements. This could be extended to allow gaps by deciding which
5247// source elements to spuriously demand.
5248staticboolisCompressMask(ArrayRef<int> Mask) {
5249intLast = -1;
5250bool SawUndef =false;
5251for (unsigned i = 0; i < Mask.size(); i++) {
5252if (Mask[i] == -1) {
5253 SawUndef =true;
5254continue;
5255 }
5256if (SawUndef)
5257returnfalse;
5258if (i > (unsigned)Mask[i])
5259returnfalse;
5260if (Mask[i] <=Last)
5261returnfalse;
5262Last = Mask[i];
5263 }
5264returntrue;
5265}
5266
5267/// Given a shuffle where the indices are disjoint between the two sources,
5268/// e.g.:
5269///
5270/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5271///
5272/// Merge the two sources into one and do a single source shuffle:
5273///
5274/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5275/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5276///
5277/// A vselect will either be merged into a masked instruction or be lowered as a
5278/// vmerge.vvm, which is cheaper than a vrgather.vv.
5279staticSDValuelowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
5280SelectionDAG &DAG,
5281constRISCVSubtarget &Subtarget) {
5282MVT VT = SVN->getSimpleValueType(0);
5283MVT XLenVT = Subtarget.getXLenVT();
5284SDLocDL(SVN);
5285
5286constArrayRef<int> Mask = SVN->getMask();
5287
5288// Work out which source each lane will come from.
5289SmallVector<int, 16> Srcs(Mask.size(), -1);
5290
5291for (intIdx : Mask) {
5292if (Idx == -1)
5293continue;
5294unsigned SrcIdx =Idx % Mask.size();
5295int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5296if (Srcs[SrcIdx] == -1)
5297// Mark this source as using this lane.
5298 Srcs[SrcIdx] = Src;
5299elseif (Srcs[SrcIdx] != Src)
5300// The other source is using this lane: not disjoint.
5301returnSDValue();
5302 }
5303
5304SmallVector<SDValue> SelectMaskVals;
5305for (int Lane : Srcs) {
5306if (Lane == -1)
5307 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5308else
5309 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1,DL, XLenVT));
5310 }
5311MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5312SDValue SelectMask = DAG.getBuildVector(MaskVT,DL, SelectMaskVals);
5313SDValueSelect = DAG.getNode(ISD::VSELECT,DL, VT, SelectMask,
5314 SVN->getOperand(0), SVN->getOperand(1));
5315
5316// Move all indices relative to the first source.
5317SmallVector<int> NewMask(Mask.size());
5318for (unsignedI = 0;I < Mask.size();I++) {
5319if (Mask[I] == -1)
5320 NewMask[I] = -1;
5321else
5322 NewMask[I] = Mask[I] % Mask.size();
5323 }
5324
5325return DAG.getVectorShuffle(VT,DL,Select, DAG.getUNDEF(VT), NewMask);
5326}
5327
5328/// Try to widen element type to get a new mask value for a better permutation
5329/// sequence. This doesn't try to inspect the widened mask for profitability;
5330/// we speculate the widened form is equal or better. This has the effect of
5331/// reducing mask constant sizes - allowing cheaper materialization sequences
5332/// - and index sequence sizes - reducing register pressure and materialization
5333/// cost, at the cost of (possibly) an extra VTYPE toggle.
5334staticSDValuetryWidenMaskForShuffle(SDValueOp,SelectionDAG &DAG) {
5335SDLocDL(Op);
5336MVT VT =Op.getSimpleValueType();
5337MVT ScalarVT = VT.getVectorElementType();
5338unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5339SDValue V0 =Op.getOperand(0);
5340SDValue V1 =Op.getOperand(1);
5341ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5342
5343// Avoid wasted work leading to isTypeLegal check failing below
5344if (ElementSize > 32)
5345returnSDValue();
5346
5347SmallVector<int, 8> NewMask;
5348if (!widenShuffleMaskElts(Mask, NewMask))
5349returnSDValue();
5350
5351MVT NewEltVT = VT.isFloatingPoint() ?MVT::getFloatingPointVT(ElementSize * 2)
5352 :MVT::getIntegerVT(ElementSize * 2);
5353MVT NewVT =MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5354if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5355returnSDValue();
5356 V0 = DAG.getBitcast(NewVT, V0);
5357 V1 = DAG.getBitcast(NewVT, V1);
5358return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT,DL, V0, V1, NewMask));
5359}
5360
5361staticSDValuelowerVECTOR_SHUFFLE(SDValueOp,SelectionDAG &DAG,
5362constRISCVSubtarget &Subtarget) {
5363SDValue V1 =Op.getOperand(0);
5364SDValue V2 =Op.getOperand(1);
5365SDLocDL(Op);
5366MVT XLenVT = Subtarget.getXLenVT();
5367MVT VT =Op.getSimpleValueType();
5368unsigned NumElts = VT.getVectorNumElements();
5369ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5370
5371if (VT.getVectorElementType() == MVT::i1) {
5372// Lower to a vror.vi of a larger element type if possible before we promote
5373// i1s to i8s.
5374if (SDValue V =lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5375return V;
5376if (SDValue V =lowerBitreverseShuffle(SVN, DAG, Subtarget))
5377return V;
5378
5379// Promote i1 shuffle to i8 shuffle.
5380MVT WidenVT =MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5381 V1 = DAG.getNode(ISD::ZERO_EXTEND,DL, WidenVT, V1);
5382 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5383 : DAG.getNode(ISD::ZERO_EXTEND,DL, WidenVT, V2);
5384SDValue Shuffled = DAG.getVectorShuffle(WidenVT,DL, V1, V2, SVN->getMask());
5385return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0,DL, WidenVT),
5386ISD::SETNE);
5387 }
5388
5389MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
5390
5391auto [TrueMask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
5392
5393if (SVN->isSplat()) {
5394constint Lane = SVN->getSplatIndex();
5395if (Lane >= 0) {
5396MVT SVT = VT.getVectorElementType();
5397
5398// Turn splatted vector load into a strided load with an X0 stride.
5399SDValue V = V1;
5400// Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5401// with undef.
5402// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5403intOffset = Lane;
5404if (V.getOpcode() ==ISD::CONCAT_VECTORS) {
5405int OpElements =
5406 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5407 V = V.getOperand(Offset / OpElements);
5408Offset %= OpElements;
5409 }
5410
5411// We need to ensure the load isn't atomic or volatile.
5412if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5413auto *Ld = cast<LoadSDNode>(V);
5414Offset *= SVT.getStoreSize();
5415SDValue NewAddr = DAG.getMemBasePlusOffset(
5416 Ld->getBasePtr(),TypeSize::getFixed(Offset),DL);
5417
5418// If this is SEW=64 on RV32, use a strided load with a stride of x0.
5419if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5420SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5421SDValue IntID =
5422 DAG.getTargetConstant(Intrinsic::riscv_vlse,DL, XLenVT);
5423SDValue Ops[] = {Ld->getChain(),
5424 IntID,
5425 DAG.getUNDEF(ContainerVT),
5426 NewAddr,
5427 DAG.getRegister(RISCV::X0, XLenVT),
5428 VL};
5429SDValue NewLoad = DAG.getMemIntrinsicNode(
5430ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops, SVT,
5431 DAG.getMachineFunction().getMachineMemOperand(
5432 Ld->getMemOperand(),Offset, SVT.getStoreSize()));
5433 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5434returnconvertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5435 }
5436
5437MVT SplatVT = ContainerVT;
5438
5439// f16 with zvfhmin and bf16 need to use an integer scalar load.
5440if (SVT == MVT::bf16 ||
5441 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5442 SVT = MVT::i16;
5443 SplatVT = ContainerVT.changeVectorElementType(SVT);
5444 }
5445
5446// Otherwise use a scalar load and splat. This will give the best
5447// opportunity to fold a splat into the operation. ISel can turn it into
5448// the x0 strided load if we aren't able to fold away the select.
5449if (SVT.isFloatingPoint())
5450 V = DAG.getLoad(SVT,DL, Ld->getChain(), NewAddr,
5451 Ld->getPointerInfo().getWithOffset(Offset),
5452 Ld->getOriginalAlign(),
5453 Ld->getMemOperand()->getFlags());
5454else
5455 V = DAG.getExtLoad(ISD::EXTLOAD,DL, XLenVT, Ld->getChain(), NewAddr,
5456 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5457 Ld->getOriginalAlign(),
5458 Ld->getMemOperand()->getFlags());
5459 DAG.makeEquivalentMemoryOrdering(Ld, V);
5460
5461unsigned Opc = SplatVT.isFloatingPoint() ?RISCVISD::VFMV_V_F_VL
5462 :RISCVISD::VMV_V_X_VL;
5463SDValueSplat =
5464 DAG.getNode(Opc,DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5465Splat = DAG.getBitcast(ContainerVT,Splat);
5466returnconvertFromScalableVector(VT,Splat, DAG, Subtarget);
5467 }
5468
5469 V1 =convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5470assert(Lane < (int)NumElts &&"Unexpected lane!");
5471SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL,DL, ContainerVT,
5472 V1, DAG.getConstant(Lane,DL, XLenVT),
5473 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5474returnconvertFromScalableVector(VT, Gather, DAG, Subtarget);
5475 }
5476 }
5477
5478// For exact VLEN m2 or greater, try to split to m1 operations if we
5479// can split cleanly.
5480if (SDValue V =lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5481return V;
5482
5483ArrayRef<int> Mask = SVN->getMask();
5484
5485if (SDValue V =
5486lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5487return V;
5488
5489if (SDValue V =
5490lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5491return V;
5492
5493// A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5494// available.
5495if (Subtarget.hasStdExtZvkb())
5496if (SDValue V =lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5497return V;
5498
5499// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5500// be undef which can be handled with a single SLIDEDOWN/UP.
5501int LoSrc, HiSrc;
5502int Rotation =isElementRotate(LoSrc, HiSrc, Mask);
5503if (Rotation > 0) {
5504SDValue LoV, HiV;
5505if (LoSrc >= 0) {
5506 LoV = LoSrc == 0 ? V1 : V2;
5507 LoV =convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5508 }
5509if (HiSrc >= 0) {
5510 HiV = HiSrc == 0 ? V1 : V2;
5511 HiV =convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5512 }
5513
5514// We found a rotation. We need to slide HiV down by Rotation. Then we need
5515// to slide LoV up by (NumElts - Rotation).
5516unsigned InvRotate = NumElts - Rotation;
5517
5518SDValue Res = DAG.getUNDEF(ContainerVT);
5519if (HiV) {
5520// Even though we could use a smaller VL, don't to avoid a vsetivli
5521// toggle.
5522 Res =getVSlidedown(DAG, Subtarget,DL, ContainerVT, Res, HiV,
5523 DAG.getConstant(Rotation,DL, XLenVT), TrueMask, VL);
5524 }
5525if (LoV)
5526 Res =getVSlideup(DAG, Subtarget,DL, ContainerVT, Res, LoV,
5527 DAG.getConstant(InvRotate,DL, XLenVT), TrueMask, VL,
5528RISCVII::TAIL_AGNOSTIC);
5529
5530returnconvertFromScalableVector(VT, Res, DAG, Subtarget);
5531 }
5532
5533if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5534return DAG.getNode(ISD::VECTOR_REVERSE,DL, VT, V1);
5535
5536// If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5537// use shift and truncate to perform the shuffle.
5538// TODO: For Factor=6, we can perform the first step of the deinterleave via
5539// shift-and-trunc reducing total cost for everything except an mf8 result.
5540// TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5541// to do the entire operation.
5542if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5543constunsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5544assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5545for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5546unsigned Index = 0;
5547if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5548 1 <count_if(Mask, [](intIdx) {returnIdx != -1; })) {
5549if (SDValue Src =getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5550returngetDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5551 }
5552 }
5553 }
5554
5555if (SDValue V =
5556lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5557return V;
5558
5559// Detect an interleave shuffle and lower to
5560// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5561int EvenSrc, OddSrc;
5562if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5563// Extract the halves of the vectors.
5564MVT HalfVT = VT.getHalfNumVectorElementsVT();
5565
5566// Recognize if one half is actually undef; the matching above will
5567// otherwise reuse the even stream for the undef one. This improves
5568// spread(2) shuffles.
5569bool LaneIsUndef[2] = {true,true};
5570for (unsigned i = 0; i < Mask.size(); i++)
5571 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5572
5573intSize = Mask.size();
5574SDValue EvenV, OddV;
5575if (LaneIsUndef[0]) {
5576 EvenV = DAG.getUNDEF(HalfVT);
5577 }else {
5578assert(EvenSrc >= 0 &&"Undef source?");
5579 EvenV = (EvenSrc /Size) == 0 ? V1 : V2;
5580 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, HalfVT, EvenV,
5581 DAG.getVectorIdxConstant(EvenSrc %Size,DL));
5582 }
5583
5584if (LaneIsUndef[1]) {
5585 OddV = DAG.getUNDEF(HalfVT);
5586 }else {
5587assert(OddSrc >= 0 &&"Undef source?");
5588 OddV = (OddSrc /Size) == 0 ? V1 : V2;
5589 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, HalfVT, OddV,
5590 DAG.getVectorIdxConstant(OddSrc %Size,DL));
5591 }
5592
5593returngetWideningInterleave(EvenV, OddV,DL, DAG, Subtarget);
5594 }
5595
5596
5597// Handle any remaining single source shuffles
5598assert(!V1.isUndef() &&"Unexpected shuffle canonicalization");
5599if (V2.isUndef()) {
5600// We might be able to express the shuffle as a bitrotate. But even if we
5601// don't have Zvkb and have to expand, the expanded sequence of approx. 2
5602// shifts and a vor will have a higher throughput than a vrgather.
5603if (SDValue V =lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5604return V;
5605
5606// Before hitting generic lowering fallbacks, try to widen the mask
5607// to a wider SEW.
5608if (SDValue V =tryWidenMaskForShuffle(Op, DAG))
5609return V;
5610
5611// Can we generate a vcompress instead of a vrgather? These scale better
5612// at high LMUL, at the cost of not being able to fold a following select
5613// into them. The mask constants are also smaller than the index vector
5614// constants, and thus easier to materialize.
5615if (isCompressMask(Mask)) {
5616SmallVector<SDValue> MaskVals(NumElts,
5617 DAG.getConstant(false,DL, XLenVT));
5618for (autoIdx : Mask) {
5619if (Idx == -1)
5620break;
5621assert(Idx >= 0 && (unsigned)Idx < NumElts);
5622 MaskVals[Idx] = DAG.getConstant(true,DL, XLenVT);
5623 }
5624MVT MaskVT =MVT::getVectorVT(MVT::i1, NumElts);
5625SDValue CompressMask = DAG.getBuildVector(MaskVT,DL, MaskVals);
5626return DAG.getNode(ISD::VECTOR_COMPRESS,DL, VT, V1, CompressMask,
5627 DAG.getUNDEF(VT));
5628 }
5629
5630// Match a spread(4,8) which can be done via extend and shift. Spread(2)
5631// is fully covered in interleave(2) above, so it is ignored here.
5632if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5633unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5634assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5635for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5636unsigned Index;
5637if (isSpreadMask(Mask, Factor, Index)) {
5638MVT NarrowVT =
5639MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5640SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, NarrowVT, V1,
5641 DAG.getVectorIdxConstant(0,DL));
5642returngetWideningSpread(Src, Factor, Index,DL, DAG);
5643 }
5644 }
5645 }
5646
5647if (VT.getScalarSizeInBits() == 8 &&
5648any_of(Mask, [&](constauto &Idx) {returnIdx > 255; })) {
5649// On such a vector we're unable to use i8 as the index type.
5650// FIXME: We could promote the index to i16 and use vrgatherei16, but that
5651// may involve vector splitting if we're already at LMUL=8, or our
5652// user-supplied maximum fixed-length LMUL.
5653returnSDValue();
5654 }
5655
5656// Base case for the two operand recursion below - handle the worst case
5657// single source shuffle.
5658unsigned GatherVVOpc =RISCVISD::VRGATHER_VV_VL;
5659MVT IndexVT = VT.changeTypeToInteger();
5660// Since we can't introduce illegal index types at this stage, use i16 and
5661// vrgatherei16 if the corresponding index type for plain vrgather is greater
5662// than XLenVT.
5663if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5664 GatherVVOpc =RISCVISD::VRGATHEREI16_VV_VL;
5665 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5666 }
5667
5668// If the mask allows, we can do all the index computation in 16 bits. This
5669// requires less work and less register pressure at high LMUL, and creates
5670// smaller constants which may be cheaper to materialize.
5671if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5672 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5673 GatherVVOpc =RISCVISD::VRGATHEREI16_VV_VL;
5674 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5675 }
5676
5677MVT IndexContainerVT =
5678 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5679
5680 V1 =convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5681SmallVector<SDValue> GatherIndicesLHS;
5682for (int MaskIndex : Mask) {
5683bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5684 GatherIndicesLHS.push_back(IsLHSIndex
5685 ? DAG.getConstant(MaskIndex,DL, XLenVT)
5686 : DAG.getUNDEF(XLenVT));
5687 }
5688SDValue LHSIndices = DAG.getBuildVector(IndexVT,DL, GatherIndicesLHS);
5689 LHSIndices =convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5690 Subtarget);
5691SDValue Gather = DAG.getNode(GatherVVOpc,DL, ContainerVT, V1, LHSIndices,
5692 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5693returnconvertFromScalableVector(VT, Gather, DAG, Subtarget);
5694 }
5695
5696// As a backup, shuffles can be lowered via a vrgather instruction, possibly
5697// merged with a second vrgather.
5698SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5699
5700// Now construct the mask that will be used by the blended vrgather operation.
5701// Construct the appropriate indices into each vector.
5702for (int MaskIndex : Mask) {
5703bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5704 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5705 ? MaskIndex : -1);
5706 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5707 }
5708
5709// If the mask indices are disjoint between the two sources, we can lower it
5710// as a vselect + a single source vrgather.vv. Don't do this if we think the
5711// operands may end up being lowered to something cheaper than a vrgather.vv.
5712if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5713 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5714 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5715 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5716 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5717if (SDValue V =lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5718return V;
5719
5720// Before hitting generic lowering fallbacks, try to widen the mask
5721// to a wider SEW.
5722if (SDValue V =tryWidenMaskForShuffle(Op, DAG))
5723return V;
5724
5725// Try to pick a profitable operand order.
5726bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5727 SwapOps = SwapOps ^ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5728
5729// Recursively invoke lowering for each operand if we had two
5730// independent single source shuffles, and then combine the result via a
5731// vselect. Note that the vselect will likely be folded back into the
5732// second permute (vrgather, or other) by the post-isel combine.
5733 V1 = DAG.getVectorShuffle(VT,DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5734 V2 = DAG.getVectorShuffle(VT,DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5735
5736SmallVector<SDValue> MaskVals;
5737for (int MaskIndex : Mask) {
5738bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5739 MaskVals.push_back(DAG.getConstant(SelectMaskVal,DL, XLenVT));
5740 }
5741
5742assert(MaskVals.size() == NumElts &&"Unexpected select-like shuffle");
5743MVT MaskVT =MVT::getVectorVT(MVT::i1, NumElts);
5744SDValue SelectMask = DAG.getBuildVector(MaskVT,DL, MaskVals);
5745
5746if (SwapOps)
5747return DAG.getNode(ISD::VSELECT,DL, VT, SelectMask, V1, V2);
5748return DAG.getNode(ISD::VSELECT,DL, VT, SelectMask, V2, V1);
5749}
5750
5751boolRISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M,EVT VT) const{
5752// Only support legal VTs for other shuffles for now.
5753if (!isTypeLegal(VT))
5754returnfalse;
5755
5756// Support splats for any type. These should type legalize well.
5757if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5758returntrue;
5759
5760MVT SVT = VT.getSimpleVT();
5761
5762// Not for i1 vectors.
5763if (SVT.getScalarType() == MVT::i1)
5764returnfalse;
5765
5766int Dummy1, Dummy2;
5767return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5768isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5769}
5770
5771// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5772// the exponent.
5773SDValue
5774RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValueOp,
5775SelectionDAG &DAG) const{
5776MVT VT =Op.getSimpleValueType();
5777unsigned EltSize = VT.getScalarSizeInBits();
5778SDValue Src =Op.getOperand(0);
5779SDLocDL(Op);
5780MVT ContainerVT = VT;
5781
5782SDValue Mask, VL;
5783if (Op->isVPOpcode()) {
5784 Mask =Op.getOperand(1);
5785if (VT.isFixedLengthVector())
5786 Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5787 Subtarget);
5788 VL =Op.getOperand(2);
5789 }
5790
5791// We choose FP type that can represent the value if possible. Otherwise, we
5792// use rounding to zero conversion for correct exponent of the result.
5793// TODO: Use f16 for i8 when possible?
5794MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5795if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5796 FloatEltVT = MVT::f32;
5797MVT FloatVT =MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5798
5799// Legal types should have been checked in the RISCVTargetLowering
5800// constructor.
5801// TODO: Splitting may make sense in some cases.
5802assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5803"Expected legal float type!");
5804
5805// For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5806// The trailing zero count is equal to log2 of this single bit value.
5807if (Op.getOpcode() ==ISD::CTTZ_ZERO_UNDEF) {
5808SDValue Neg = DAG.getNegative(Src,DL, VT);
5809 Src = DAG.getNode(ISD::AND,DL, VT, Src, Neg);
5810 }elseif (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5811SDValue Neg = DAG.getNode(ISD::VP_SUB,DL, VT, DAG.getConstant(0,DL, VT),
5812 Src, Mask, VL);
5813 Src = DAG.getNode(ISD::VP_AND,DL, VT, Src, Neg, Mask, VL);
5814 }
5815
5816// We have a legal FP type, convert to it.
5817SDValue FloatVal;
5818if (FloatVT.bitsGT(VT)) {
5819if (Op->isVPOpcode())
5820 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP,DL, FloatVT, Src, Mask, VL);
5821else
5822 FloatVal = DAG.getNode(ISD::UINT_TO_FP,DL, FloatVT, Src);
5823 }else {
5824// Use RTZ to avoid rounding influencing exponent of FloatVal.
5825if (VT.isFixedLengthVector()) {
5826 ContainerVT =getContainerForFixedLengthVector(VT);
5827 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5828 }
5829if (!Op->isVPOpcode())
5830 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
5831SDValue RTZRM =
5832 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, Subtarget.getXLenVT());
5833MVT ContainerFloatVT =
5834MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5835 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL,DL, ContainerFloatVT,
5836 Src, Mask, RTZRM, VL);
5837if (VT.isFixedLengthVector())
5838 FloatVal =convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5839 }
5840// Bitcast to integer and shift the exponent to the LSB.
5841EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5842SDValueBitcast = DAG.getBitcast(IntVT, FloatVal);
5843unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5844
5845SDValueExp;
5846// Restore back to original type. Truncation after SRL is to generate vnsrl.
5847if (Op->isVPOpcode()) {
5848Exp = DAG.getNode(ISD::VP_SRL,DL, IntVT, Bitcast,
5849 DAG.getConstant(ShiftAmt,DL, IntVT), Mask, VL);
5850Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5851 }else {
5852Exp = DAG.getNode(ISD::SRL,DL, IntVT, Bitcast,
5853 DAG.getConstant(ShiftAmt,DL, IntVT));
5854if (IntVT.bitsLT(VT))
5855Exp = DAG.getNode(ISD::ZERO_EXTEND,DL, VT, Exp);
5856elseif (IntVT.bitsGT(VT))
5857Exp = DAG.getNode(ISD::TRUNCATE,DL, VT, Exp);
5858 }
5859
5860// The exponent contains log2 of the value in biased form.
5861unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5862// For trailing zeros, we just need to subtract the bias.
5863if (Op.getOpcode() ==ISD::CTTZ_ZERO_UNDEF)
5864return DAG.getNode(ISD::SUB,DL, VT, Exp,
5865 DAG.getConstant(ExponentBias,DL, VT));
5866if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5867return DAG.getNode(ISD::VP_SUB,DL, VT, Exp,
5868 DAG.getConstant(ExponentBias,DL, VT), Mask, VL);
5869
5870// For leading zeros, we need to remove the bias and convert from log2 to
5871// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5872unsigned Adjust = ExponentBias + (EltSize - 1);
5873SDValue Res;
5874if (Op->isVPOpcode())
5875 Res = DAG.getNode(ISD::VP_SUB,DL, VT, DAG.getConstant(Adjust,DL, VT),Exp,
5876Mask, VL);
5877else
5878 Res = DAG.getNode(ISD::SUB,DL, VT, DAG.getConstant(Adjust,DL, VT),Exp);
5879
5880// The above result with zero input equals to Adjust which is greater than
5881// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5882if (Op.getOpcode() ==ISD::CTLZ)
5883 Res = DAG.getNode(ISD::UMIN,DL, VT, Res, DAG.getConstant(EltSize,DL, VT));
5884elseif (Op.getOpcode() == ISD::VP_CTLZ)
5885 Res = DAG.getNode(ISD::VP_UMIN,DL, VT, Res,
5886 DAG.getConstant(EltSize,DL, VT),Mask, VL);
5887return Res;
5888}
5889
5890SDValue RISCVTargetLowering::lowerVPCttzElements(SDValueOp,
5891SelectionDAG &DAG) const{
5892SDLocDL(Op);
5893MVT XLenVT = Subtarget.getXLenVT();
5894SDValueSource =Op->getOperand(0);
5895MVT SrcVT =Source.getSimpleValueType();
5896SDValueMask =Op->getOperand(1);
5897SDValue EVL =Op->getOperand(2);
5898
5899if (SrcVT.isFixedLengthVector()) {
5900MVT ContainerVT =getContainerForFixedLengthVector(SrcVT);
5901Source =convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5902Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5903 Subtarget);
5904 SrcVT = ContainerVT;
5905 }
5906
5907// Convert to boolean vector.
5908if (SrcVT.getScalarType() != MVT::i1) {
5909SDValue AllZero = DAG.getConstant(0,DL, SrcVT);
5910 SrcVT =MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5911Source = DAG.getNode(RISCVISD::SETCC_VL,DL, SrcVT,
5912 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5913 DAG.getUNDEF(SrcVT), Mask, EVL});
5914 }
5915
5916SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL,DL, XLenVT, Source, Mask, EVL);
5917if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5918// In this case, we can interpret poison as -1, so nothing to do further.
5919return Res;
5920
5921// Convert -1 to VL.
5922SDValue SetCC =
5923 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0,DL, XLenVT),ISD::SETLT);
5924 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5925return DAG.getNode(ISD::TRUNCATE,DL,Op.getValueType(), Res);
5926}
5927
5928// While RVV has alignment restrictions, we should always be able to load as a
5929// legal equivalently-sized byte-typed vector instead. This method is
5930// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5931// the load is already correctly-aligned, it returns SDValue().
5932SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValueOp,
5933SelectionDAG &DAG) const{
5934auto *Load = cast<LoadSDNode>(Op);
5935assert(Load &&Load->getMemoryVT().isVector() &&"Expected vector load");
5936
5937if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5938Load->getMemoryVT(),
5939 *Load->getMemOperand()))
5940returnSDValue();
5941
5942SDLocDL(Op);
5943MVT VT =Op.getSimpleValueType();
5944unsigned EltSizeBits = VT.getScalarSizeInBits();
5945assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5946"Unexpected unaligned RVV load type");
5947MVT NewVT =
5948MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5949assert(NewVT.isValid() &&
5950"Expecting equally-sized RVV vector types to be legal");
5951SDValueL = DAG.getLoad(NewVT,DL,Load->getChain(),Load->getBasePtr(),
5952Load->getPointerInfo(),Load->getOriginalAlign(),
5953Load->getMemOperand()->getFlags());
5954return DAG.getMergeValues({DAG.getBitcast(VT, L),L.getValue(1)},DL);
5955}
5956
5957// While RVV has alignment restrictions, we should always be able to store as a
5958// legal equivalently-sized byte-typed vector instead. This method is
5959// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5960// returns SDValue() if the store is already correctly aligned.
5961SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValueOp,
5962SelectionDAG &DAG) const{
5963auto *Store = cast<StoreSDNode>(Op);
5964assert(Store &&Store->getValue().getValueType().isVector() &&
5965"Expected vector store");
5966
5967if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5968Store->getMemoryVT(),
5969 *Store->getMemOperand()))
5970returnSDValue();
5971
5972SDLocDL(Op);
5973SDValue StoredVal =Store->getValue();
5974MVT VT = StoredVal.getSimpleValueType();
5975unsigned EltSizeBits = VT.getScalarSizeInBits();
5976assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5977"Unexpected unaligned RVV store type");
5978MVT NewVT =
5979MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5980assert(NewVT.isValid() &&
5981"Expecting equally-sized RVV vector types to be legal");
5982 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5983return DAG.getStore(Store->getChain(),DL, StoredVal,Store->getBasePtr(),
5984Store->getPointerInfo(),Store->getOriginalAlign(),
5985Store->getMemOperand()->getFlags());
5986}
5987
5988staticSDValuelowerConstant(SDValueOp,SelectionDAG &DAG,
5989constRISCVSubtarget &Subtarget) {
5990assert(Op.getValueType() == MVT::i64 &&"Unexpected VT");
5991
5992 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5993
5994// All simm32 constants should be handled by isel.
5995// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5996// this check redundant, but small immediates are common so this check
5997// should have better compile time.
5998if (isInt<32>(Imm))
5999returnOp;
6000
6001// We only need to cost the immediate, if constant pool lowering is enabled.
6002if (!Subtarget.useConstantPoolForLargeInts())
6003returnOp;
6004
6005RISCVMatInt::InstSeq Seq =RISCVMatInt::generateInstSeq(Imm, Subtarget);
6006if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6007returnOp;
6008
6009// Optimizations below are disabled for opt size. If we're optimizing for
6010// size, use a constant pool.
6011if (DAG.shouldOptForSize())
6012returnSDValue();
6013
6014// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6015// that if it will avoid a constant pool.
6016// It will require an extra temporary register though.
6017// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6018// low and high 32 bits are the same and bit 31 and 63 are set.
6019unsigned ShiftAmt, AddOpc;
6020RISCVMatInt::InstSeq SeqLo =
6021RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6022if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6023returnOp;
6024
6025returnSDValue();
6026}
6027
6028SDValue RISCVTargetLowering::lowerConstantFP(SDValueOp,
6029SelectionDAG &DAG) const{
6030MVT VT =Op.getSimpleValueType();
6031constAPFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6032
6033// Can this constant be selected by a Zfa FLI instruction?
6034bool Negate =false;
6035intIndex =getLegalZfaFPImm(Imm, VT);
6036
6037// If the constant is negative, try negating.
6038if (Index < 0 &&Imm.isNegative()) {
6039Index =getLegalZfaFPImm(-Imm, VT);
6040 Negate =true;
6041 }
6042
6043// If we couldn't find a FLI lowering, fall back to generic code.
6044if (Index < 0)
6045returnSDValue();
6046
6047// Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6048SDLocDL(Op);
6049SDValueConst =
6050 DAG.getNode(RISCVISD::FLI,DL, VT,
6051 DAG.getTargetConstant(Index,DL, Subtarget.getXLenVT()));
6052if (!Negate)
6053returnConst;
6054
6055return DAG.getNode(ISD::FNEG,DL, VT, Const);
6056}
6057
6058staticSDValueLowerATOMIC_FENCE(SDValueOp,SelectionDAG &DAG,
6059constRISCVSubtarget &Subtarget) {
6060SDLoc dl(Op);
6061AtomicOrdering FenceOrdering =
6062static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6063SyncScope::ID FenceSSID =
6064static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6065
6066if (Subtarget.hasStdExtZtso()) {
6067// The only fence that needs an instruction is a sequentially-consistent
6068// cross-thread fence.
6069if (FenceOrdering ==AtomicOrdering::SequentiallyConsistent &&
6070 FenceSSID ==SyncScope::System)
6071returnOp;
6072
6073// MEMBARRIER is a compiler barrier; it codegens to a no-op.
6074return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other,Op.getOperand(0));
6075 }
6076
6077// singlethread fences only synchronize with signal handlers on the same
6078// thread and thus only need to preserve instruction order, not actually
6079// enforce memory ordering.
6080if (FenceSSID ==SyncScope::SingleThread)
6081// MEMBARRIER is a compiler barrier; it codegens to a no-op.
6082return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other,Op.getOperand(0));
6083
6084returnOp;
6085}
6086
6087SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValueOp,
6088SelectionDAG &DAG) const{
6089SDLocDL(Op);
6090MVT VT =Op.getSimpleValueType();
6091MVT XLenVT = Subtarget.getXLenVT();
6092unsignedCheck =Op.getConstantOperandVal(1);
6093unsigned TDCMask = 0;
6094if (Check &fcSNan)
6095 TDCMask |=RISCV::FPMASK_Signaling_NaN;
6096if (Check &fcQNan)
6097 TDCMask |=RISCV::FPMASK_Quiet_NaN;
6098if (Check &fcPosInf)
6099 TDCMask |=RISCV::FPMASK_Positive_Infinity;
6100if (Check &fcNegInf)
6101 TDCMask |=RISCV::FPMASK_Negative_Infinity;
6102if (Check &fcPosNormal)
6103 TDCMask |=RISCV::FPMASK_Positive_Normal;
6104if (Check &fcNegNormal)
6105 TDCMask |=RISCV::FPMASK_Negative_Normal;
6106if (Check &fcPosSubnormal)
6107 TDCMask |=RISCV::FPMASK_Positive_Subnormal;
6108if (Check &fcNegSubnormal)
6109 TDCMask |=RISCV::FPMASK_Negative_Subnormal;
6110if (Check &fcPosZero)
6111 TDCMask |=RISCV::FPMASK_Positive_Zero;
6112if (Check &fcNegZero)
6113 TDCMask |=RISCV::FPMASK_Negative_Zero;
6114
6115bool IsOneBitMask =isPowerOf2_32(TDCMask);
6116
6117SDValue TDCMaskV = DAG.getConstant(TDCMask,DL, XLenVT);
6118
6119if (VT.isVector()) {
6120SDValue Op0 =Op.getOperand(0);
6121MVT VT0 =Op.getOperand(0).getSimpleValueType();
6122
6123if (VT.isScalableVector()) {
6124MVT DstVT = VT0.changeVectorElementTypeToInteger();
6125auto [Mask, VL] =getDefaultScalableVLOps(VT0,DL, DAG, Subtarget);
6126if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6127Mask =Op.getOperand(2);
6128 VL =Op.getOperand(3);
6129 }
6130SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL,DL, DstVT, Op0, Mask,
6131 VL,Op->getFlags());
6132if (IsOneBitMask)
6133return DAG.getSetCC(DL, VT, FPCLASS,
6134 DAG.getConstant(TDCMask,DL, DstVT),
6135ISD::CondCode::SETEQ);
6136SDValueAND = DAG.getNode(ISD::AND,DL, DstVT, FPCLASS,
6137 DAG.getConstant(TDCMask,DL, DstVT));
6138return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0,DL, DstVT),
6139ISD::SETNE);
6140 }
6141
6142MVT ContainerVT0 =getContainerForFixedLengthVector(VT0);
6143MVT ContainerVT =getContainerForFixedLengthVector(VT);
6144MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6145auto [Mask, VL] =getDefaultVLOps(VT0, ContainerVT0,DL, DAG, Subtarget);
6146if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6147Mask =Op.getOperand(2);
6148MVT MaskContainerVT =
6149getContainerForFixedLengthVector(Mask.getSimpleValueType());
6150Mask =convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6151 VL =Op.getOperand(3);
6152 }
6153 Op0 =convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6154
6155SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL,DL, ContainerDstVT, Op0,
6156 Mask, VL,Op->getFlags());
6157
6158 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerDstVT,
6159 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6160if (IsOneBitMask) {
6161SDValue VMSEQ =
6162 DAG.getNode(RISCVISD::SETCC_VL,DL, ContainerVT,
6163 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6164 DAG.getUNDEF(ContainerVT), Mask, VL});
6165returnconvertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6166 }
6167SDValueAND = DAG.getNode(RISCVISD::AND_VL,DL, ContainerDstVT, FPCLASS,
6168 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6169
6170SDValue SplatZero = DAG.getConstant(0,DL, XLenVT);
6171 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerDstVT,
6172 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6173
6174SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL,DL, ContainerVT,
6175 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6176 DAG.getUNDEF(ContainerVT), Mask, VL});
6177returnconvertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6178 }
6179
6180SDValueFCLASS = DAG.getNode(RISCVISD::FCLASS,DL, XLenVT,Op.getOperand(0));
6181SDValueAND = DAG.getNode(ISD::AND,DL, XLenVT, FCLASS, TDCMaskV);
6182SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0,DL, XLenVT),
6183ISD::CondCode::SETNE);
6184return DAG.getNode(ISD::TRUNCATE,DL, VT, Res);
6185}
6186
6187// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6188// operations propagate nans.
6189staticSDValuelowerFMAXIMUM_FMINIMUM(SDValueOp,SelectionDAG &DAG,
6190constRISCVSubtarget &Subtarget) {
6191SDLocDL(Op);
6192MVT VT =Op.getSimpleValueType();
6193
6194SDValueX =Op.getOperand(0);
6195SDValueY =Op.getOperand(1);
6196
6197if (!VT.isVector()) {
6198MVT XLenVT = Subtarget.getXLenVT();
6199
6200// If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6201// ensures that when one input is a nan, the other will also be a nan
6202// allowing the nan to propagate. If both inputs are nan, this will swap the
6203// inputs which is harmless.
6204
6205SDValue NewY =Y;
6206if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6207SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT,X,X,ISD::SETOEQ);
6208 NewY = DAG.getSelect(DL, VT, XIsNonNan,Y,X);
6209 }
6210
6211SDValue NewX =X;
6212if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6213SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT,Y,Y,ISD::SETOEQ);
6214 NewX = DAG.getSelect(DL, VT, YIsNonNan,X,Y);
6215 }
6216
6217unsigned Opc =
6218Op.getOpcode() ==ISD::FMAXIMUM ?RISCVISD::FMAX :RISCVISD::FMIN;
6219return DAG.getNode(Opc,DL, VT, NewX, NewY);
6220 }
6221
6222// Check no NaNs before converting to fixed vector scalable.
6223bool XIsNeverNan =Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6224bool YIsNeverNan =Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6225
6226MVT ContainerVT = VT;
6227if (VT.isFixedLengthVector()) {
6228 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);
6229X =convertToScalableVector(ContainerVT,X, DAG, Subtarget);
6230Y =convertToScalableVector(ContainerVT,Y, DAG, Subtarget);
6231 }
6232
6233SDValue Mask, VL;
6234if (Op->isVPOpcode()) {
6235 Mask =Op.getOperand(2);
6236if (VT.isFixedLengthVector())
6237 Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6238 Subtarget);
6239 VL =Op.getOperand(3);
6240 }else {
6241 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
6242 }
6243
6244SDValue NewY =Y;
6245if (!XIsNeverNan) {
6246SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL,DL, Mask.getValueType(),
6247 {X, X, DAG.getCondCode(ISD::SETOEQ),
6248 DAG.getUNDEF(ContainerVT), Mask, VL});
6249 NewY = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, XIsNonNan,Y,X,
6250 DAG.getUNDEF(ContainerVT), VL);
6251 }
6252
6253SDValue NewX =X;
6254if (!YIsNeverNan) {
6255SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL,DL, Mask.getValueType(),
6256 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6257 DAG.getUNDEF(ContainerVT), Mask, VL});
6258 NewX = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, YIsNonNan,X,Y,
6259 DAG.getUNDEF(ContainerVT), VL);
6260 }
6261
6262unsigned Opc =
6263Op.getOpcode() ==ISD::FMAXIMUM ||Op->getOpcode() == ISD::VP_FMAXIMUM
6264 ?RISCVISD::VFMAX_VL
6265 :RISCVISD::VFMIN_VL;
6266SDValue Res = DAG.getNode(Opc,DL, ContainerVT, NewX, NewY,
6267 DAG.getUNDEF(ContainerVT), Mask, VL);
6268if (VT.isFixedLengthVector())
6269 Res =convertFromScalableVector(VT, Res, DAG, Subtarget);
6270return Res;
6271}
6272
6273staticSDValuelowerFABSorFNEG(SDValueOp,SelectionDAG &DAG,
6274constRISCVSubtarget &Subtarget) {
6275bool IsFABS =Op.getOpcode() ==ISD::FABS;
6276assert((IsFABS ||Op.getOpcode() ==ISD::FNEG) &&
6277"Wrong opcode for lowering FABS or FNEG.");
6278
6279MVT XLenVT = Subtarget.getXLenVT();
6280MVT VT =Op.getSimpleValueType();
6281assert((VT == MVT::f16 || VT == MVT::bf16) &&"Unexpected type");
6282
6283SDLocDL(Op);
6284SDValue Fmv =
6285 DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT,Op.getOperand(0));
6286
6287APInt Mask = IsFABS ?APInt::getSignedMaxValue(16) :APInt::getSignMask(16);
6288 Mask = Mask.sext(Subtarget.getXLen());
6289
6290unsigned LogicOpc = IsFABS ?ISD::AND :ISD::XOR;
6291SDValue Logic =
6292 DAG.getNode(LogicOpc,DL, XLenVT, Fmv, DAG.getConstant(Mask,DL, XLenVT));
6293return DAG.getNode(RISCVISD::FMV_H_X,DL, VT, Logic);
6294}
6295
6296staticSDValuelowerFCOPYSIGN(SDValueOp,SelectionDAG &DAG,
6297constRISCVSubtarget &Subtarget) {
6298assert(Op.getOpcode() ==ISD::FCOPYSIGN &&"Unexpected opcode");
6299
6300MVT XLenVT = Subtarget.getXLenVT();
6301MVT VT =Op.getSimpleValueType();
6302assert((VT == MVT::f16 || VT == MVT::bf16) &&"Unexpected type");
6303
6304SDValue Mag =Op.getOperand(0);
6305SDValue Sign =Op.getOperand(1);
6306
6307SDLocDL(Op);
6308
6309// Get sign bit into an integer value.
6310SDValue SignAsInt;
6311unsigned SignSize = Sign.getValueSizeInBits();
6312if (SignSize == Subtarget.getXLen()) {
6313 SignAsInt = DAG.getNode(ISD::BITCAST,DL, XLenVT, Sign);
6314 }elseif (SignSize == 16) {
6315 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Sign);
6316 }elseif (SignSize == 32) {
6317 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, XLenVT, Sign);
6318 }elseif (SignSize == 64) {
6319assert(XLenVT == MVT::i32 &&"Unexpected type");
6320// Copy the upper word to integer.
6321 SignAsInt = DAG.getNode(RISCVISD::SplitF64,DL, {MVT::i32, MVT::i32}, Sign)
6322 .getValue(1);
6323 SignSize = 32;
6324 }else
6325llvm_unreachable("Unexpected sign size");
6326
6327// Get the signbit at the right position for MagAsInt.
6328int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6329if (ShiftAmount > 0) {
6330 SignAsInt = DAG.getNode(ISD::SRL,DL, XLenVT, SignAsInt,
6331 DAG.getConstant(ShiftAmount,DL, XLenVT));
6332 }elseif (ShiftAmount < 0) {
6333 SignAsInt = DAG.getNode(ISD::SHL,DL, XLenVT, SignAsInt,
6334 DAG.getConstant(-ShiftAmount,DL, XLenVT));
6335 }
6336
6337// Mask the sign bit and any bits above it. The extra bits will be dropped
6338// when we convert back to FP.
6339SDValue SignMask = DAG.getConstant(
6340APInt::getSignMask(16).sext(Subtarget.getXLen()),DL, XLenVT);
6341SDValue SignBit = DAG.getNode(ISD::AND,DL, XLenVT, SignAsInt, SignMask);
6342
6343// Transform Mag value to integer, and clear the sign bit.
6344SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Mag);
6345SDValue ClearSignMask = DAG.getConstant(
6346APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()),DL, XLenVT);
6347SDValue ClearedSign =
6348 DAG.getNode(ISD::AND,DL, XLenVT, MagAsInt, ClearSignMask);
6349
6350SDValue CopiedSign = DAG.getNode(ISD::OR,DL, XLenVT, ClearedSign, SignBit,
6351SDNodeFlags::Disjoint);
6352
6353return DAG.getNode(RISCVISD::FMV_H_X,DL, VT, CopiedSign);
6354}
6355
6356/// Get a RISC-V target specified VL op for a given SDNode.
6357staticunsignedgetRISCVVLOp(SDValueOp) {
6358#define OP_CASE(NODE) \
6359 case ISD::NODE: \
6360 return RISCVISD::NODE##_VL;
6361#define VP_CASE(NODE) \
6362 case ISD::VP_##NODE: \
6363 return RISCVISD::NODE##_VL;
6364// clang-format off
6365switch (Op.getOpcode()) {
6366default:
6367llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6368OP_CASE(ADD)
6369OP_CASE(SUB)
6370OP_CASE(MUL)
6371OP_CASE(MULHS)
6372OP_CASE(MULHU)
6373OP_CASE(SDIV)
6374OP_CASE(SREM)
6375OP_CASE(UDIV)
6376OP_CASE(UREM)
6377OP_CASE(SHL)
6378OP_CASE(SRA)
6379OP_CASE(SRL)
6380OP_CASE(ROTL)
6381OP_CASE(ROTR)
6382OP_CASE(BSWAP)
6383OP_CASE(CTTZ)
6384OP_CASE(CTLZ)
6385OP_CASE(CTPOP)
6386OP_CASE(BITREVERSE)
6387OP_CASE(SADDSAT)
6388OP_CASE(UADDSAT)
6389OP_CASE(SSUBSAT)
6390OP_CASE(USUBSAT)
6391OP_CASE(AVGFLOORS)
6392OP_CASE(AVGFLOORU)
6393OP_CASE(AVGCEILS)
6394OP_CASE(AVGCEILU)
6395OP_CASE(FADD)
6396OP_CASE(FSUB)
6397OP_CASE(FMUL)
6398OP_CASE(FDIV)
6399OP_CASE(FNEG)
6400OP_CASE(FABS)
6401OP_CASE(FSQRT)
6402OP_CASE(SMIN)
6403OP_CASE(SMAX)
6404OP_CASE(UMIN)
6405OP_CASE(UMAX)
6406OP_CASE(STRICT_FADD)
6407OP_CASE(STRICT_FSUB)
6408OP_CASE(STRICT_FMUL)
6409OP_CASE(STRICT_FDIV)
6410OP_CASE(STRICT_FSQRT)
6411VP_CASE(ADD)// VP_ADD
6412VP_CASE(SUB)// VP_SUB
6413VP_CASE(MUL)// VP_MUL
6414VP_CASE(SDIV)// VP_SDIV
6415VP_CASE(SREM)// VP_SREM
6416VP_CASE(UDIV)// VP_UDIV
6417VP_CASE(UREM)// VP_UREM
6418VP_CASE(SHL)// VP_SHL
6419VP_CASE(FADD)// VP_FADD
6420VP_CASE(FSUB)// VP_FSUB
6421VP_CASE(FMUL)// VP_FMUL
6422VP_CASE(FDIV)// VP_FDIV
6423VP_CASE(FNEG)// VP_FNEG
6424VP_CASE(FABS)// VP_FABS
6425VP_CASE(SMIN)// VP_SMIN
6426VP_CASE(SMAX)// VP_SMAX
6427VP_CASE(UMIN)// VP_UMIN
6428VP_CASE(UMAX)// VP_UMAX
6429VP_CASE(FCOPYSIGN)// VP_FCOPYSIGN
6430VP_CASE(SETCC)// VP_SETCC
6431VP_CASE(SINT_TO_FP)// VP_SINT_TO_FP
6432VP_CASE(UINT_TO_FP)// VP_UINT_TO_FP
6433VP_CASE(BITREVERSE)// VP_BITREVERSE
6434VP_CASE(SADDSAT)// VP_SADDSAT
6435VP_CASE(UADDSAT)// VP_UADDSAT
6436VP_CASE(SSUBSAT)// VP_SSUBSAT
6437VP_CASE(USUBSAT)// VP_USUBSAT
6438VP_CASE(BSWAP)// VP_BSWAP
6439VP_CASE(CTLZ)// VP_CTLZ
6440VP_CASE(CTTZ)// VP_CTTZ
6441VP_CASE(CTPOP)// VP_CTPOP
6442caseISD::CTLZ_ZERO_UNDEF:
6443case ISD::VP_CTLZ_ZERO_UNDEF:
6444returnRISCVISD::CTLZ_VL;
6445caseISD::CTTZ_ZERO_UNDEF:
6446case ISD::VP_CTTZ_ZERO_UNDEF:
6447returnRISCVISD::CTTZ_VL;
6448caseISD::FMA:
6449case ISD::VP_FMA:
6450returnRISCVISD::VFMADD_VL;
6451caseISD::STRICT_FMA:
6452returnRISCVISD::STRICT_VFMADD_VL;
6453caseISD::AND:
6454case ISD::VP_AND:
6455if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6456returnRISCVISD::VMAND_VL;
6457returnRISCVISD::AND_VL;
6458caseISD::OR:
6459case ISD::VP_OR:
6460if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6461returnRISCVISD::VMOR_VL;
6462returnRISCVISD::OR_VL;
6463caseISD::XOR:
6464case ISD::VP_XOR:
6465if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6466returnRISCVISD::VMXOR_VL;
6467returnRISCVISD::XOR_VL;
6468case ISD::VP_SELECT:
6469case ISD::VP_MERGE:
6470returnRISCVISD::VMERGE_VL;
6471case ISD::VP_SRA:
6472returnRISCVISD::SRA_VL;
6473case ISD::VP_SRL:
6474returnRISCVISD::SRL_VL;
6475case ISD::VP_SQRT:
6476returnRISCVISD::FSQRT_VL;
6477case ISD::VP_SIGN_EXTEND:
6478returnRISCVISD::VSEXT_VL;
6479case ISD::VP_ZERO_EXTEND:
6480returnRISCVISD::VZEXT_VL;
6481case ISD::VP_FP_TO_SINT:
6482returnRISCVISD::VFCVT_RTZ_X_F_VL;
6483case ISD::VP_FP_TO_UINT:
6484returnRISCVISD::VFCVT_RTZ_XU_F_VL;
6485caseISD::FMINNUM:
6486case ISD::VP_FMINNUM:
6487returnRISCVISD::VFMIN_VL;
6488caseISD::FMAXNUM:
6489case ISD::VP_FMAXNUM:
6490returnRISCVISD::VFMAX_VL;
6491caseISD::LRINT:
6492case ISD::VP_LRINT:
6493caseISD::LLRINT:
6494case ISD::VP_LLRINT:
6495returnRISCVISD::VFCVT_RM_X_F_VL;
6496 }
6497// clang-format on
6498#undef OP_CASE
6499#undef VP_CASE
6500}
6501
6502/// Return true if a RISC-V target specified op has a passthru operand.
6503staticboolhasPassthruOp(unsigned Opcode) {
6504assert(Opcode >RISCVISD::FIRST_NUMBER &&
6505 Opcode <=RISCVISD::LAST_STRICTFP_OPCODE &&
6506"not a RISC-V target specific op");
6507static_assert(
6508RISCVISD::LAST_VL_VECTOR_OP -RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
6509RISCVISD::LAST_STRICTFP_OPCODE -RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
6510"adding target specific op should update this function");
6511if (Opcode >=RISCVISD::ADD_VL && Opcode <=RISCVISD::VFMAX_VL)
6512returntrue;
6513if (Opcode ==RISCVISD::FCOPYSIGN_VL)
6514returntrue;
6515if (Opcode >=RISCVISD::VWMUL_VL && Opcode <=RISCVISD::VFWSUB_W_VL)
6516returntrue;
6517if (Opcode ==RISCVISD::SETCC_VL)
6518returntrue;
6519if (Opcode >=RISCVISD::STRICT_FADD_VL && Opcode <=RISCVISD::STRICT_FDIV_VL)
6520returntrue;
6521if (Opcode ==RISCVISD::VMERGE_VL)
6522returntrue;
6523returnfalse;
6524}
6525
6526/// Return true if a RISC-V target specified op has a mask operand.
6527staticboolhasMaskOp(unsigned Opcode) {
6528assert(Opcode >RISCVISD::FIRST_NUMBER &&
6529 Opcode <=RISCVISD::LAST_STRICTFP_OPCODE &&
6530"not a RISC-V target specific op");
6531static_assert(
6532RISCVISD::LAST_VL_VECTOR_OP -RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
6533RISCVISD::LAST_STRICTFP_OPCODE -RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
6534"adding target specific op should update this function");
6535if (Opcode >=RISCVISD::TRUNCATE_VECTOR_VL && Opcode <=RISCVISD::SETCC_VL)
6536returntrue;
6537if (Opcode >=RISCVISD::VRGATHER_VX_VL && Opcode <=RISCVISD::VFIRST_VL)
6538returntrue;
6539if (Opcode >=RISCVISD::STRICT_FADD_VL &&
6540 Opcode <=RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
6541returntrue;
6542returnfalse;
6543}
6544
6545staticboolisPromotedOpNeedingSplit(SDValueOp,
6546constRISCVSubtarget &Subtarget) {
6547if (Op.getValueType() == MVT::nxv32f16 &&
6548 (Subtarget.hasVInstructionsF16Minimal() &&
6549 !Subtarget.hasVInstructionsF16()))
6550returntrue;
6551if (Op.getValueType() == MVT::nxv32bf16)
6552returntrue;
6553returnfalse;
6554}
6555
6556staticSDValueSplitVectorOp(SDValueOp,SelectionDAG &DAG) {
6557auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6558SDLocDL(Op);
6559
6560SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
6561SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
6562
6563for (unsigned j = 0; j !=Op.getNumOperands(); ++j) {
6564if (!Op.getOperand(j).getValueType().isVector()) {
6565 LoOperands[j] =Op.getOperand(j);
6566 HiOperands[j] =Op.getOperand(j);
6567continue;
6568 }
6569 std::tie(LoOperands[j], HiOperands[j]) =
6570 DAG.SplitVector(Op.getOperand(j),DL);
6571 }
6572
6573SDValue LoRes =
6574 DAG.getNode(Op.getOpcode(),DL, LoVT, LoOperands,Op->getFlags());
6575SDValue HiRes =
6576 DAG.getNode(Op.getOpcode(),DL, HiVT, HiOperands,Op->getFlags());
6577
6578return DAG.getNode(ISD::CONCAT_VECTORS,DL,Op.getValueType(), LoRes, HiRes);
6579}
6580
6581staticSDValueSplitVPOp(SDValueOp,SelectionDAG &DAG) {
6582assert(ISD::isVPOpcode(Op.getOpcode()) &&"Not a VP op");
6583auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6584SDLocDL(Op);
6585
6586SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
6587SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
6588
6589for (unsigned j = 0; j !=Op.getNumOperands(); ++j) {
6590if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6591 std::tie(LoOperands[j], HiOperands[j]) =
6592 DAG.SplitEVL(Op.getOperand(j),Op.getValueType(),DL);
6593continue;
6594 }
6595if (!Op.getOperand(j).getValueType().isVector()) {
6596 LoOperands[j] =Op.getOperand(j);
6597 HiOperands[j] =Op.getOperand(j);
6598continue;
6599 }
6600 std::tie(LoOperands[j], HiOperands[j]) =
6601 DAG.SplitVector(Op.getOperand(j),DL);
6602 }
6603
6604SDValue LoRes =
6605 DAG.getNode(Op.getOpcode(),DL, LoVT, LoOperands,Op->getFlags());
6606SDValue HiRes =
6607 DAG.getNode(Op.getOpcode(),DL, HiVT, HiOperands,Op->getFlags());
6608
6609return DAG.getNode(ISD::CONCAT_VECTORS,DL,Op.getValueType(), LoRes, HiRes);
6610}
6611
6612staticSDValueSplitVectorReductionOp(SDValueOp,SelectionDAG &DAG) {
6613SDLocDL(Op);
6614
6615auto [Lo,Hi] = DAG.SplitVector(Op.getOperand(1),DL);
6616auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2),DL);
6617auto [EVLLo, EVLHi] =
6618 DAG.SplitEVL(Op.getOperand(3),Op.getOperand(1).getValueType(),DL);
6619
6620SDValue ResLo =
6621 DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),
6622 {Op.getOperand(0), Lo, MaskLo, EVLLo},Op->getFlags());
6623return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),
6624 {ResLo, Hi, MaskHi, EVLHi},Op->getFlags());
6625}
6626
6627staticSDValueSplitStrictFPVectorOp(SDValueOp,SelectionDAG &DAG) {
6628
6629assert(Op->isStrictFPOpcode());
6630
6631auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6632
6633SDVTList LoVTs = DAG.getVTList(LoVT,Op->getValueType(1));
6634SDVTList HiVTs = DAG.getVTList(HiVT,Op->getValueType(1));
6635
6636SDLocDL(Op);
6637
6638SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
6639SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
6640
6641for (unsigned j = 0; j !=Op.getNumOperands(); ++j) {
6642if (!Op.getOperand(j).getValueType().isVector()) {
6643 LoOperands[j] =Op.getOperand(j);
6644 HiOperands[j] =Op.getOperand(j);
6645continue;
6646 }
6647 std::tie(LoOperands[j], HiOperands[j]) =
6648 DAG.SplitVector(Op.getOperand(j),DL);
6649 }
6650
6651SDValue LoRes =
6652 DAG.getNode(Op.getOpcode(),DL, LoVTs, LoOperands,Op->getFlags());
6653 HiOperands[0] = LoRes.getValue(1);
6654SDValue HiRes =
6655 DAG.getNode(Op.getOpcode(),DL, HiVTs, HiOperands,Op->getFlags());
6656
6657SDValue V = DAG.getNode(ISD::CONCAT_VECTORS,DL,Op->getValueType(0),
6658 LoRes.getValue(0), HiRes.getValue(0));
6659return DAG.getMergeValues({V, HiRes.getValue(1)},DL);
6660}
6661
6662SDValueRISCVTargetLowering::LowerOperation(SDValueOp,
6663SelectionDAG &DAG) const{
6664switch (Op.getOpcode()) {
6665default:
6666report_fatal_error("unimplemented operand");
6667caseISD::ATOMIC_FENCE:
6668returnLowerATOMIC_FENCE(Op, DAG, Subtarget);
6669caseISD::GlobalAddress:
6670return lowerGlobalAddress(Op, DAG);
6671caseISD::BlockAddress:
6672return lowerBlockAddress(Op, DAG);
6673caseISD::ConstantPool:
6674return lowerConstantPool(Op, DAG);
6675caseISD::JumpTable:
6676return lowerJumpTable(Op, DAG);
6677caseISD::GlobalTLSAddress:
6678return lowerGlobalTLSAddress(Op, DAG);
6679caseISD::Constant:
6680returnlowerConstant(Op, DAG, Subtarget);
6681caseISD::ConstantFP:
6682return lowerConstantFP(Op, DAG);
6683caseISD::SELECT:
6684return lowerSELECT(Op, DAG);
6685caseISD::BRCOND:
6686return lowerBRCOND(Op, DAG);
6687caseISD::VASTART:
6688return lowerVASTART(Op, DAG);
6689caseISD::FRAMEADDR:
6690return lowerFRAMEADDR(Op, DAG);
6691caseISD::RETURNADDR:
6692return lowerRETURNADDR(Op, DAG);
6693caseISD::SHL_PARTS:
6694return lowerShiftLeftParts(Op, DAG);
6695caseISD::SRA_PARTS:
6696return lowerShiftRightParts(Op, DAG,true);
6697caseISD::SRL_PARTS:
6698return lowerShiftRightParts(Op, DAG,false);
6699caseISD::ROTL:
6700caseISD::ROTR:
6701if (Op.getValueType().isFixedLengthVector()) {
6702assert(Subtarget.hasStdExtZvkb());
6703return lowerToScalableOp(Op, DAG);
6704 }
6705assert(Subtarget.hasVendorXTHeadBb() &&
6706 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6707"Unexpected custom legalization");
6708// XTHeadBb only supports rotate by constant.
6709if (!isa<ConstantSDNode>(Op.getOperand(1)))
6710returnSDValue();
6711returnOp;
6712caseISD::BITCAST: {
6713SDLocDL(Op);
6714EVT VT =Op.getValueType();
6715SDValue Op0 =Op.getOperand(0);
6716EVT Op0VT = Op0.getValueType();
6717MVT XLenVT = Subtarget.getXLenVT();
6718if (Op0VT == MVT::i16 &&
6719 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6720 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6721SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, Op0);
6722return DAG.getNode(RISCVISD::FMV_H_X,DL, VT, NewOp0);
6723 }
6724if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6725 Subtarget.hasStdExtFOrZfinx()) {
6726SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, Op0);
6727return DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32, NewOp0);
6728 }
6729if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6730 Subtarget.hasStdExtDOrZdinx()) {
6731SDValueLo,Hi;
6732 std::tie(Lo,Hi) = DAG.SplitScalar(Op0,DL, MVT::i32, MVT::i32);
6733return DAG.getNode(RISCVISD::BuildPairF64,DL, MVT::f64,Lo,Hi);
6734 }
6735
6736// Consider other scalar<->scalar casts as legal if the types are legal.
6737// Otherwise expand them.
6738if (!VT.isVector() && !Op0VT.isVector()) {
6739if (isTypeLegal(VT) &&isTypeLegal(Op0VT))
6740returnOp;
6741returnSDValue();
6742 }
6743
6744assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6745"Unexpected types");
6746
6747if (VT.isFixedLengthVector()) {
6748// We can handle fixed length vector bitcasts with a simple replacement
6749// in isel.
6750if (Op0VT.isFixedLengthVector())
6751returnOp;
6752// When bitcasting from scalar to fixed-length vector, insert the scalar
6753// into a one-element vector of the result type, and perform a vector
6754// bitcast.
6755if (!Op0VT.isVector()) {
6756EVT BVT =EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6757if (!isTypeLegal(BVT))
6758returnSDValue();
6759return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, BVT,
6760 DAG.getUNDEF(BVT), Op0,
6761 DAG.getVectorIdxConstant(0,DL)));
6762 }
6763returnSDValue();
6764 }
6765// Custom-legalize bitcasts from fixed-length vector types to scalar types
6766// thus: bitcast the vector to a one-element vector type whose element type
6767// is the same as the result type, and extract the first element.
6768if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6769EVT BVT =EVT::getVectorVT(*DAG.getContext(), VT, 1);
6770if (!isTypeLegal(BVT))
6771returnSDValue();
6772SDValue BVec = DAG.getBitcast(BVT, Op0);
6773return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, VT, BVec,
6774 DAG.getVectorIdxConstant(0,DL));
6775 }
6776returnSDValue();
6777 }
6778caseISD::INTRINSIC_WO_CHAIN:
6779return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6780caseISD::INTRINSIC_W_CHAIN:
6781return LowerINTRINSIC_W_CHAIN(Op, DAG);
6782caseISD::INTRINSIC_VOID:
6783return LowerINTRINSIC_VOID(Op, DAG);
6784caseISD::IS_FPCLASS:
6785return LowerIS_FPCLASS(Op, DAG);
6786caseISD::BITREVERSE: {
6787MVT VT =Op.getSimpleValueType();
6788if (VT.isFixedLengthVector()) {
6789assert(Subtarget.hasStdExtZvbb());
6790return lowerToScalableOp(Op, DAG);
6791 }
6792SDLocDL(Op);
6793assert(Subtarget.hasStdExtZbkb() &&"Unexpected custom legalization");
6794assert(Op.getOpcode() ==ISD::BITREVERSE &&"Unexpected opcode");
6795// Expand bitreverse to a bswap(rev8) followed by brev8.
6796SDValue BSwap = DAG.getNode(ISD::BSWAP,DL, VT,Op.getOperand(0));
6797return DAG.getNode(RISCVISD::BREV8,DL, VT, BSwap);
6798 }
6799caseISD::TRUNCATE:
6800caseISD::TRUNCATE_SSAT_S:
6801caseISD::TRUNCATE_USAT_U:
6802// Only custom-lower vector truncates
6803if (!Op.getSimpleValueType().isVector())
6804returnOp;
6805return lowerVectorTruncLike(Op, DAG);
6806caseISD::ANY_EXTEND:
6807caseISD::ZERO_EXTEND:
6808if (Op.getOperand(0).getValueType().isVector() &&
6809Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6810return lowerVectorMaskExt(Op, DAG,/*ExtVal*/ 1);
6811return lowerFixedLengthVectorExtendToRVV(Op, DAG,RISCVISD::VZEXT_VL);
6812caseISD::SIGN_EXTEND:
6813if (Op.getOperand(0).getValueType().isVector() &&
6814Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6815return lowerVectorMaskExt(Op, DAG,/*ExtVal*/ -1);
6816return lowerFixedLengthVectorExtendToRVV(Op, DAG,RISCVISD::VSEXT_VL);
6817caseISD::SPLAT_VECTOR_PARTS:
6818return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6819caseISD::INSERT_VECTOR_ELT:
6820return lowerINSERT_VECTOR_ELT(Op, DAG);
6821caseISD::EXTRACT_VECTOR_ELT:
6822return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6823caseISD::SCALAR_TO_VECTOR: {
6824MVT VT =Op.getSimpleValueType();
6825SDLocDL(Op);
6826SDValue Scalar =Op.getOperand(0);
6827if (VT.getVectorElementType() == MVT::i1) {
6828MVT WideVT = VT.changeVectorElementType(MVT::i8);
6829SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR,DL, WideVT, Scalar);
6830return DAG.getNode(ISD::TRUNCATE,DL, VT, V);
6831 }
6832MVT ContainerVT = VT;
6833if (VT.isFixedLengthVector())
6834 ContainerVT =getContainerForFixedLengthVector(VT);
6835SDValue VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;
6836
6837SDValue V;
6838if (VT.isFloatingPoint()) {
6839 V = DAG.getNode(RISCVISD::VFMV_S_F_VL,DL, ContainerVT,
6840 DAG.getUNDEF(ContainerVT), Scalar, VL);
6841 }else {
6842 Scalar = DAG.getNode(ISD::ANY_EXTEND,DL, Subtarget.getXLenVT(), Scalar);
6843 V = DAG.getNode(RISCVISD::VMV_S_X_VL,DL, ContainerVT,
6844 DAG.getUNDEF(ContainerVT), Scalar, VL);
6845 }
6846if (VT.isFixedLengthVector())
6847 V =convertFromScalableVector(VT, V, DAG, Subtarget);
6848return V;
6849 }
6850caseISD::VSCALE: {
6851MVT XLenVT = Subtarget.getXLenVT();
6852MVT VT =Op.getSimpleValueType();
6853SDLocDL(Op);
6854SDValue Res = DAG.getNode(RISCVISD::READ_VLENB,DL, XLenVT);
6855// We define our scalable vector types for lmul=1 to use a 64 bit known
6856// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6857// vscale as VLENB / 8.
6858static_assert(RISCV::RVVBitsPerBlock == 64,"Unexpected bits per block!");
6859if (Subtarget.getRealMinVLen() <RISCV::RVVBitsPerBlock)
6860report_fatal_error("Support for VLEN==32 is incomplete.");
6861// We assume VLENB is a multiple of 8. We manually choose the best shift
6862// here because SimplifyDemandedBits isn't always able to simplify it.
6863uint64_t Val =Op.getConstantOperandVal(0);
6864if (isPowerOf2_64(Val)) {
6865uint64_tLog2 =Log2_64(Val);
6866if (Log2 < 3)
6867 Res = DAG.getNode(ISD::SRL,DL, XLenVT, Res,
6868 DAG.getConstant(3 -Log2,DL, VT));
6869elseif (Log2 > 3)
6870 Res = DAG.getNode(ISD::SHL,DL, XLenVT, Res,
6871 DAG.getConstant(Log2 - 3,DL, XLenVT));
6872 }elseif ((Val % 8) == 0) {
6873// If the multiplier is a multiple of 8, scale it down to avoid needing
6874// to shift the VLENB value.
6875 Res = DAG.getNode(ISD::MUL,DL, XLenVT, Res,
6876 DAG.getConstant(Val / 8,DL, XLenVT));
6877 }else {
6878SDValue VScale = DAG.getNode(ISD::SRL,DL, XLenVT, Res,
6879 DAG.getConstant(3,DL, XLenVT));
6880 Res = DAG.getNode(ISD::MUL,DL, XLenVT, VScale,
6881 DAG.getConstant(Val,DL, XLenVT));
6882 }
6883return DAG.getNode(ISD::TRUNCATE,DL, VT, Res);
6884 }
6885caseISD::FPOWI: {
6886// Custom promote f16 powi with illegal i32 integer type on RV64. Once
6887// promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6888if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6889Op.getOperand(1).getValueType() == MVT::i32) {
6890SDLocDL(Op);
6891SDValue Op0 = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32,Op.getOperand(0));
6892SDValue Powi =
6893 DAG.getNode(ISD::FPOWI,DL, MVT::f32, Op0,Op.getOperand(1));
6894return DAG.getNode(ISD::FP_ROUND,DL, MVT::f16, Powi,
6895 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));
6896 }
6897returnSDValue();
6898 }
6899caseISD::FMAXIMUM:
6900caseISD::FMINIMUM:
6901if (isPromotedOpNeedingSplit(Op, Subtarget))
6902returnSplitVectorOp(Op, DAG);
6903returnlowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6904caseISD::FP_EXTEND:
6905caseISD::FP_ROUND:
6906return lowerVectorFPExtendOrRoundLike(Op, DAG);
6907caseISD::STRICT_FP_ROUND:
6908caseISD::STRICT_FP_EXTEND:
6909return lowerStrictFPExtendOrRoundLike(Op, DAG);
6910caseISD::SINT_TO_FP:
6911caseISD::UINT_TO_FP:
6912if (Op.getValueType().isVector() &&
6913 ((Op.getValueType().getScalarType() == MVT::f16 &&
6914 (Subtarget.hasVInstructionsF16Minimal() &&
6915 !Subtarget.hasVInstructionsF16())) ||
6916Op.getValueType().getScalarType() == MVT::bf16)) {
6917if (isPromotedOpNeedingSplit(Op, Subtarget))
6918returnSplitVectorOp(Op, DAG);
6919// int -> f32
6920SDLocDL(Op);
6921MVT NVT =
6922MVT::getVectorVT(MVT::f32,Op.getValueType().getVectorElementCount());
6923SDValueNC = DAG.getNode(Op.getOpcode(),DL, NVT,Op->ops());
6924// f32 -> [b]f16
6925return DAG.getNode(ISD::FP_ROUND,DL,Op.getValueType(),NC,
6926 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));
6927 }
6928 [[fallthrough]];
6929caseISD::FP_TO_SINT:
6930caseISD::FP_TO_UINT:
6931if (SDValue Op1 =Op.getOperand(0);
6932 Op1.getValueType().isVector() &&
6933 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6934 (Subtarget.hasVInstructionsF16Minimal() &&
6935 !Subtarget.hasVInstructionsF16())) ||
6936 Op1.getValueType().getScalarType() == MVT::bf16)) {
6937if (isPromotedOpNeedingSplit(Op1, Subtarget))
6938returnSplitVectorOp(Op, DAG);
6939// [b]f16 -> f32
6940SDLocDL(Op);
6941MVT NVT =MVT::getVectorVT(MVT::f32,
6942 Op1.getValueType().getVectorElementCount());
6943SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND,DL, NVT, Op1);
6944// f32 -> int
6945return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(), WidenVec);
6946 }
6947 [[fallthrough]];
6948caseISD::STRICT_FP_TO_SINT:
6949caseISD::STRICT_FP_TO_UINT:
6950caseISD::STRICT_SINT_TO_FP:
6951caseISD::STRICT_UINT_TO_FP: {
6952// RVV can only do fp<->int conversions to types half/double the size as
6953// the source. We custom-lower any conversions that do two hops into
6954// sequences.
6955MVT VT =Op.getSimpleValueType();
6956if (VT.isScalarInteger())
6957returnlowerFP_TO_INT(Op, DAG, Subtarget);
6958bool IsStrict =Op->isStrictFPOpcode();
6959SDValue Src =Op.getOperand(0 + IsStrict);
6960MVT SrcVT = Src.getSimpleValueType();
6961if (SrcVT.isScalarInteger())
6962returnlowerINT_TO_FP(Op, DAG, Subtarget);
6963if (!VT.isVector())
6964returnOp;
6965SDLocDL(Op);
6966MVT EltVT = VT.getVectorElementType();
6967MVT SrcEltVT = SrcVT.getVectorElementType();
6968unsigned EltSize = EltVT.getSizeInBits();
6969unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6970assert(isPowerOf2_32(EltSize) &&isPowerOf2_32(SrcEltSize) &&
6971"Unexpected vector element types");
6972
6973bool IsInt2FP = SrcEltVT.isInteger();
6974// Widening conversions
6975if (EltSize > (2 * SrcEltSize)) {
6976if (IsInt2FP) {
6977// Do a regular integer sign/zero extension then convert to float.
6978MVT IVecVT =MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6979 VT.getVectorElementCount());
6980unsigned ExtOpcode = (Op.getOpcode() ==ISD::UINT_TO_FP ||
6981Op.getOpcode() ==ISD::STRICT_UINT_TO_FP)
6982 ?ISD::ZERO_EXTEND
6983 :ISD::SIGN_EXTEND;
6984SDValue Ext = DAG.getNode(ExtOpcode,DL, IVecVT, Src);
6985if (IsStrict)
6986return DAG.getNode(Op.getOpcode(),DL,Op->getVTList(),
6987Op.getOperand(0), Ext);
6988return DAG.getNode(Op.getOpcode(),DL, VT, Ext);
6989 }
6990// FP2Int
6991assert(SrcEltVT == MVT::f16 &&"Unexpected FP_TO_[US]INT lowering");
6992// Do one doubling fp_extend then complete the operation by converting
6993// to int.
6994MVT InterimFVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6995if (IsStrict) {
6996auto [FExt, Chain] =
6997 DAG.getStrictFPExtendOrRound(Src,Op.getOperand(0),DL, InterimFVT);
6998return DAG.getNode(Op.getOpcode(),DL,Op->getVTList(), Chain, FExt);
6999 }
7000SDValue FExt = DAG.getFPExtendOrRound(Src,DL, InterimFVT);
7001return DAG.getNode(Op.getOpcode(),DL, VT, FExt);
7002 }
7003
7004// Narrowing conversions
7005if (SrcEltSize > (2 * EltSize)) {
7006if (IsInt2FP) {
7007// One narrowing int_to_fp, then an fp_round.
7008assert(EltVT == MVT::f16 &&"Unexpected [US]_TO_FP lowering");
7009MVT InterimFVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7010if (IsStrict) {
7011SDValue Int2FP = DAG.getNode(Op.getOpcode(),DL,
7012 DAG.getVTList(InterimFVT, MVT::Other),
7013Op.getOperand(0), Src);
7014SDValue Chain = Int2FP.getValue(1);
7015return DAG.getStrictFPExtendOrRound(Int2FP, Chain,DL, VT).first;
7016 }
7017SDValue Int2FP = DAG.getNode(Op.getOpcode(),DL, InterimFVT, Src);
7018return DAG.getFPExtendOrRound(Int2FP,DL, VT);
7019 }
7020// FP2Int
7021// One narrowing fp_to_int, then truncate the integer. If the float isn't
7022// representable by the integer, the result is poison.
7023MVT IVecVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7024 VT.getVectorElementCount());
7025if (IsStrict) {
7026SDValue FP2Int =
7027 DAG.getNode(Op.getOpcode(),DL, DAG.getVTList(IVecVT, MVT::Other),
7028Op.getOperand(0), Src);
7029SDValue Res = DAG.getNode(ISD::TRUNCATE,DL, VT, FP2Int);
7030return DAG.getMergeValues({Res, FP2Int.getValue(1)},DL);
7031 }
7032SDValue FP2Int = DAG.getNode(Op.getOpcode(),DL, IVecVT, Src);
7033return DAG.getNode(ISD::TRUNCATE,DL, VT, FP2Int);
7034 }
7035
7036// Scalable vectors can exit here. Patterns will handle equally-sized
7037// conversions halving/doubling ones.
7038if (!VT.isFixedLengthVector())
7039returnOp;
7040
7041// For fixed-length vectors we lower to a custom "VL" node.
7042unsigned RVVOpc = 0;
7043switch (Op.getOpcode()) {
7044default:
7045llvm_unreachable("Impossible opcode");
7046caseISD::FP_TO_SINT:
7047 RVVOpc =RISCVISD::VFCVT_RTZ_X_F_VL;
7048break;
7049caseISD::FP_TO_UINT:
7050 RVVOpc =RISCVISD::VFCVT_RTZ_XU_F_VL;
7051break;
7052caseISD::SINT_TO_FP:
7053 RVVOpc =RISCVISD::SINT_TO_FP_VL;
7054break;
7055caseISD::UINT_TO_FP:
7056 RVVOpc =RISCVISD::UINT_TO_FP_VL;
7057break;
7058caseISD::STRICT_FP_TO_SINT:
7059 RVVOpc =RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7060break;
7061caseISD::STRICT_FP_TO_UINT:
7062 RVVOpc =RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7063break;
7064caseISD::STRICT_SINT_TO_FP:
7065 RVVOpc =RISCVISD::STRICT_SINT_TO_FP_VL;
7066break;
7067caseISD::STRICT_UINT_TO_FP:
7068 RVVOpc =RISCVISD::STRICT_UINT_TO_FP_VL;
7069break;
7070 }
7071
7072MVT ContainerVT =getContainerForFixedLengthVector(VT);
7073MVT SrcContainerVT =getContainerForFixedLengthVector(SrcVT);
7074assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7075"Expected same element count");
7076
7077auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
7078
7079 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7080if (IsStrict) {
7081 Src = DAG.getNode(RVVOpc,DL, DAG.getVTList(ContainerVT, MVT::Other),
7082Op.getOperand(0), Src, Mask, VL);
7083SDValue SubVec =convertFromScalableVector(VT, Src, DAG, Subtarget);
7084return DAG.getMergeValues({SubVec, Src.getValue(1)},DL);
7085 }
7086 Src = DAG.getNode(RVVOpc,DL, ContainerVT, Src, Mask, VL);
7087returnconvertFromScalableVector(VT, Src, DAG, Subtarget);
7088 }
7089caseISD::FP_TO_SINT_SAT:
7090caseISD::FP_TO_UINT_SAT:
7091returnlowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7092caseISD::FP_TO_BF16: {
7093// Custom lower to ensure the libcall return is passed in an FPR on hard
7094// float ABIs.
7095assert(!Subtarget.isSoftFPABI() &&"Unexpected custom legalization");
7096SDLocDL(Op);
7097MakeLibCallOptions CallOptions;
7098RTLIB::Libcall LC =
7099RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7100SDValue Res =
7101makeLibCall(DAG, LC, MVT::f32,Op.getOperand(0), CallOptions,DL).first;
7102if (Subtarget.is64Bit())
7103return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Res);
7104return DAG.getBitcast(MVT::i32, Res);
7105 }
7106caseISD::BF16_TO_FP: {
7107assert(Subtarget.hasStdExtFOrZfinx() &&"Unexpected custom legalization");
7108MVT VT =Op.getSimpleValueType();
7109SDLocDL(Op);
7110Op = DAG.getNode(
7111ISD::SHL,DL,Op.getOperand(0).getValueType(),Op.getOperand(0),
7112 DAG.getShiftAmountConstant(16,Op.getOperand(0).getValueType(),DL));
7113SDValue Res = Subtarget.is64Bit()
7114 ? DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32,Op)
7115 : DAG.getBitcast(MVT::f32,Op);
7116// fp_extend if the target VT is bigger than f32.
7117if (VT != MVT::f32)
7118return DAG.getNode(ISD::FP_EXTEND,DL, VT, Res);
7119return Res;
7120 }
7121caseISD::STRICT_FP_TO_FP16:
7122caseISD::FP_TO_FP16: {
7123// Custom lower to ensure the libcall return is passed in an FPR on hard
7124// float ABIs.
7125assert(Subtarget.hasStdExtFOrZfinx() &&"Unexpected custom legalisation");
7126SDLocDL(Op);
7127MakeLibCallOptions CallOptions;
7128bool IsStrict =Op->isStrictFPOpcode();
7129SDValue Op0 = IsStrict ?Op.getOperand(1) :Op.getOperand(0);
7130SDValue Chain = IsStrict ?Op.getOperand(0) :SDValue();
7131RTLIB::Libcall LC =RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7132SDValue Res;
7133 std::tie(Res, Chain) =
7134makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions,DL, Chain);
7135if (Subtarget.is64Bit())
7136return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Res);
7137SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7138if (IsStrict)
7139return DAG.getMergeValues({Result, Chain},DL);
7140return Result;
7141 }
7142caseISD::STRICT_FP16_TO_FP:
7143caseISD::FP16_TO_FP: {
7144// Custom lower to ensure the libcall argument is passed in an FPR on hard
7145// float ABIs.
7146assert(Subtarget.hasStdExtFOrZfinx() &&"Unexpected custom legalisation");
7147SDLocDL(Op);
7148MakeLibCallOptions CallOptions;
7149bool IsStrict =Op->isStrictFPOpcode();
7150SDValue Op0 = IsStrict ?Op.getOperand(1) :Op.getOperand(0);
7151SDValue Chain = IsStrict ?Op.getOperand(0) :SDValue();
7152SDValue Arg = Subtarget.is64Bit()
7153 ? DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32, Op0)
7154 : DAG.getBitcast(MVT::f32, Op0);
7155SDValue Res;
7156 std::tie(Res, Chain) =makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7157 CallOptions,DL, Chain);
7158if (IsStrict)
7159return DAG.getMergeValues({Res, Chain},DL);
7160return Res;
7161 }
7162caseISD::FTRUNC:
7163caseISD::FCEIL:
7164caseISD::FFLOOR:
7165caseISD::FNEARBYINT:
7166caseISD::FRINT:
7167caseISD::FROUND:
7168caseISD::FROUNDEVEN:
7169if (isPromotedOpNeedingSplit(Op, Subtarget))
7170returnSplitVectorOp(Op, DAG);
7171returnlowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7172caseISD::LRINT:
7173caseISD::LLRINT:
7174if (Op.getValueType().isVector())
7175returnlowerVectorXRINT(Op, DAG, Subtarget);
7176 [[fallthrough]];
7177caseISD::LROUND:
7178caseISD::LLROUND: {
7179assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7180"Unexpected custom legalisation");
7181SDLocDL(Op);
7182SDValue Ext = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32,Op.getOperand(0));
7183return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(), Ext);
7184 }
7185caseISD::STRICT_LRINT:
7186caseISD::STRICT_LLRINT:
7187caseISD::STRICT_LROUND:
7188caseISD::STRICT_LLROUND: {
7189assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7190"Unexpected custom legalisation");
7191SDLocDL(Op);
7192SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND,DL, {MVT::f32, MVT::Other},
7193 {Op.getOperand(0),Op.getOperand(1)});
7194return DAG.getNode(Op.getOpcode(),DL, {Op.getValueType(), MVT::Other},
7195 {Ext.getValue(1), Ext.getValue(0)});
7196 }
7197caseISD::VECREDUCE_ADD:
7198caseISD::VECREDUCE_UMAX:
7199caseISD::VECREDUCE_SMAX:
7200caseISD::VECREDUCE_UMIN:
7201caseISD::VECREDUCE_SMIN:
7202return lowerVECREDUCE(Op, DAG);
7203caseISD::VECREDUCE_AND:
7204caseISD::VECREDUCE_OR:
7205caseISD::VECREDUCE_XOR:
7206if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7207return lowerVectorMaskVecReduction(Op, DAG,/*IsVP*/false);
7208return lowerVECREDUCE(Op, DAG);
7209caseISD::VECREDUCE_FADD:
7210caseISD::VECREDUCE_SEQ_FADD:
7211caseISD::VECREDUCE_FMIN:
7212caseISD::VECREDUCE_FMAX:
7213caseISD::VECREDUCE_FMAXIMUM:
7214caseISD::VECREDUCE_FMINIMUM:
7215return lowerFPVECREDUCE(Op, DAG);
7216case ISD::VP_REDUCE_ADD:
7217case ISD::VP_REDUCE_UMAX:
7218case ISD::VP_REDUCE_SMAX:
7219case ISD::VP_REDUCE_UMIN:
7220case ISD::VP_REDUCE_SMIN:
7221case ISD::VP_REDUCE_FADD:
7222case ISD::VP_REDUCE_SEQ_FADD:
7223case ISD::VP_REDUCE_FMIN:
7224case ISD::VP_REDUCE_FMAX:
7225case ISD::VP_REDUCE_FMINIMUM:
7226case ISD::VP_REDUCE_FMAXIMUM:
7227if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7228returnSplitVectorReductionOp(Op, DAG);
7229return lowerVPREDUCE(Op, DAG);
7230case ISD::VP_REDUCE_AND:
7231case ISD::VP_REDUCE_OR:
7232case ISD::VP_REDUCE_XOR:
7233if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7234return lowerVectorMaskVecReduction(Op, DAG,/*IsVP*/true);
7235return lowerVPREDUCE(Op, DAG);
7236case ISD::VP_CTTZ_ELTS:
7237case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7238return lowerVPCttzElements(Op, DAG);
7239caseISD::UNDEF: {
7240MVT ContainerVT =getContainerForFixedLengthVector(Op.getSimpleValueType());
7241returnconvertFromScalableVector(Op.getSimpleValueType(),
7242 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7243 }
7244caseISD::INSERT_SUBVECTOR:
7245return lowerINSERT_SUBVECTOR(Op, DAG);
7246caseISD::EXTRACT_SUBVECTOR:
7247return lowerEXTRACT_SUBVECTOR(Op, DAG);
7248caseISD::VECTOR_DEINTERLEAVE:
7249return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7250caseISD::VECTOR_INTERLEAVE:
7251return lowerVECTOR_INTERLEAVE(Op, DAG);
7252caseISD::STEP_VECTOR:
7253return lowerSTEP_VECTOR(Op, DAG);
7254caseISD::VECTOR_REVERSE:
7255return lowerVECTOR_REVERSE(Op, DAG);
7256caseISD::VECTOR_SPLICE:
7257return lowerVECTOR_SPLICE(Op, DAG);
7258caseISD::BUILD_VECTOR:
7259returnlowerBUILD_VECTOR(Op, DAG, Subtarget);
7260caseISD::SPLAT_VECTOR: {
7261MVT VT =Op.getSimpleValueType();
7262MVT EltVT = VT.getVectorElementType();
7263if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7264 EltVT == MVT::bf16) {
7265SDLocDL(Op);
7266SDValue Elt;
7267if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7268 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7269 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, Subtarget.getXLenVT(),
7270Op.getOperand(0));
7271else
7272 Elt = DAG.getNode(ISD::BITCAST,DL, MVT::i16,Op.getOperand(0));
7273MVT IVT = VT.changeVectorElementType(MVT::i16);
7274return DAG.getNode(ISD::BITCAST,DL, VT,
7275 DAG.getNode(ISD::SPLAT_VECTOR,DL, IVT, Elt));
7276 }
7277
7278if (EltVT == MVT::i1)
7279return lowerVectorMaskSplat(Op, DAG);
7280returnSDValue();
7281 }
7282caseISD::VECTOR_SHUFFLE:
7283returnlowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7284caseISD::CONCAT_VECTORS: {
7285// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7286// better than going through the stack, as the default expansion does.
7287SDLocDL(Op);
7288MVT VT =Op.getSimpleValueType();
7289MVT ContainerVT = VT;
7290if (VT.isFixedLengthVector())
7291 ContainerVT =::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7292
7293// Recursively split concat_vectors with more than 2 operands:
7294//
7295// concat_vector op1, op2, op3, op4
7296// ->
7297// concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7298//
7299// This reduces the length of the chain of vslideups and allows us to
7300// perform the vslideups at a smaller LMUL, limited to MF2.
7301if (Op.getNumOperands() > 2 &&
7302 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7303MVT HalfVT = VT.getHalfNumVectorElementsVT();
7304assert(isPowerOf2_32(Op.getNumOperands()));
7305size_t HalfNumOps =Op.getNumOperands() / 2;
7306SDValueLo = DAG.getNode(ISD::CONCAT_VECTORS,DL, HalfVT,
7307Op->ops().take_front(HalfNumOps));
7308SDValueHi = DAG.getNode(ISD::CONCAT_VECTORS,DL, HalfVT,
7309Op->ops().drop_front(HalfNumOps));
7310return DAG.getNode(ISD::CONCAT_VECTORS,DL, VT,Lo,Hi);
7311 }
7312
7313unsigned NumOpElts =
7314Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7315SDValue Vec = DAG.getUNDEF(VT);
7316for (constauto &OpIdx :enumerate(Op->ops())) {
7317SDValue SubVec = OpIdx.value();
7318// Don't insert undef subvectors.
7319if (SubVec.isUndef())
7320continue;
7321 Vec =
7322 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, Vec, SubVec,
7323 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts,DL));
7324 }
7325return Vec;
7326 }
7327caseISD::LOAD: {
7328auto *Load = cast<LoadSDNode>(Op);
7329EVT VecTy = Load->getMemoryVT();
7330// Handle normal vector tuple load.
7331if (VecTy.isRISCVVectorTuple()) {
7332SDLocDL(Op);
7333MVT XLenVT = Subtarget.getXLenVT();
7334unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7335unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7336unsigned NumElts = Sz / (NF * 8);
7337int Log2LMUL =Log2_64(NumElts) - 3;
7338
7339auto Flag =SDNodeFlags();
7340 Flag.setNoUnsignedWrap(true);
7341SDValue Ret = DAG.getUNDEF(VecTy);
7342SDValue BasePtr = Load->getBasePtr();
7343SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB,DL, XLenVT);
7344 VROffset =
7345 DAG.getNode(ISD::SHL,DL, XLenVT, VROffset,
7346 DAG.getConstant(std::max(Log2LMUL, 0),DL, XLenVT));
7347SmallVector<SDValue, 8> OutChains;
7348
7349// Load NF vector registers and combine them to a vector tuple.
7350for (unsigned i = 0; i < NF; ++i) {
7351SDValue LoadVal = DAG.getLoad(
7352MVT::getScalableVectorVT(MVT::i8, NumElts),DL, Load->getChain(),
7353 BasePtr,MachinePointerInfo(Load->getAddressSpace()),Align(8));
7354 OutChains.push_back(LoadVal.getValue(1));
7355 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT,DL, VecTy, Ret, LoadVal,
7356 DAG.getVectorIdxConstant(i,DL));
7357 BasePtr = DAG.getNode(ISD::ADD,DL, XLenVT, BasePtr, VROffset, Flag);
7358 }
7359return DAG.getMergeValues(
7360 {Ret, DAG.getNode(ISD::TokenFactor,DL, MVT::Other, OutChains)},DL);
7361 }
7362
7363if (auto V = expandUnalignedRVVLoad(Op, DAG))
7364return V;
7365if (Op.getValueType().isFixedLengthVector())
7366return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7367returnOp;
7368 }
7369caseISD::STORE: {
7370auto *Store = cast<StoreSDNode>(Op);
7371SDValue StoredVal = Store->getValue();
7372EVT VecTy = StoredVal.getValueType();
7373// Handle normal vector tuple store.
7374if (VecTy.isRISCVVectorTuple()) {
7375SDLocDL(Op);
7376MVT XLenVT = Subtarget.getXLenVT();
7377unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7378unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7379unsigned NumElts = Sz / (NF * 8);
7380int Log2LMUL =Log2_64(NumElts) - 3;
7381
7382auto Flag =SDNodeFlags();
7383 Flag.setNoUnsignedWrap(true);
7384SDValue Ret;
7385SDValue Chain = Store->getChain();
7386SDValue BasePtr = Store->getBasePtr();
7387SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB,DL, XLenVT);
7388 VROffset =
7389 DAG.getNode(ISD::SHL,DL, XLenVT, VROffset,
7390 DAG.getConstant(std::max(Log2LMUL, 0),DL, XLenVT));
7391
7392// Extract subregisters in a vector tuple and store them individually.
7393for (unsigned i = 0; i < NF; ++i) {
7394auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT,DL,
7395MVT::getScalableVectorVT(MVT::i8, NumElts),
7396 StoredVal, DAG.getVectorIdxConstant(i,DL));
7397 Ret = DAG.getStore(Chain,DL, Extract, BasePtr,
7398MachinePointerInfo(Store->getAddressSpace()),
7399 Store->getOriginalAlign(),
7400 Store->getMemOperand()->getFlags());
7401 Chain = Ret.getValue(0);
7402 BasePtr = DAG.getNode(ISD::ADD,DL, XLenVT, BasePtr, VROffset, Flag);
7403 }
7404return Ret;
7405 }
7406
7407if (auto V = expandUnalignedRVVStore(Op, DAG))
7408return V;
7409if (Op.getOperand(1).getValueType().isFixedLengthVector())
7410return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7411returnOp;
7412 }
7413caseISD::MLOAD:
7414case ISD::VP_LOAD:
7415return lowerMaskedLoad(Op, DAG);
7416caseISD::MSTORE:
7417case ISD::VP_STORE:
7418return lowerMaskedStore(Op, DAG);
7419caseISD::VECTOR_COMPRESS:
7420return lowerVectorCompress(Op, DAG);
7421caseISD::SELECT_CC: {
7422// This occurs because we custom legalize SETGT and SETUGT for setcc. That
7423// causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7424// into separate SETCC+SELECT just like LegalizeDAG.
7425SDValue Tmp1 =Op.getOperand(0);
7426SDValue Tmp2 =Op.getOperand(1);
7427SDValue True =Op.getOperand(2);
7428SDValue False =Op.getOperand(3);
7429EVT VT =Op.getValueType();
7430SDValueCC =Op.getOperand(4);
7431EVT CmpVT = Tmp1.getValueType();
7432EVT CCVT =
7433getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7434SDLocDL(Op);
7435SDValueCond =
7436 DAG.getNode(ISD::SETCC,DL, CCVT, Tmp1, Tmp2,CC,Op->getFlags());
7437return DAG.getSelect(DL, VT,Cond, True, False);
7438 }
7439caseISD::SETCC: {
7440MVT OpVT =Op.getOperand(0).getSimpleValueType();
7441if (OpVT.isScalarInteger()) {
7442MVT VT =Op.getSimpleValueType();
7443SDValueLHS =Op.getOperand(0);
7444SDValueRHS =Op.getOperand(1);
7445ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7446assert((CCVal ==ISD::SETGT || CCVal ==ISD::SETUGT) &&
7447"Unexpected CondCode");
7448
7449SDLocDL(Op);
7450
7451// If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7452// convert this to the equivalent of (set(u)ge X, C+1) by using
7453// (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7454// in a register.
7455if (isa<ConstantSDNode>(RHS)) {
7456 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7457if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7458// If this is an unsigned compare and the constant is -1, incrementing
7459// the constant would change behavior. The result should be false.
7460if (CCVal ==ISD::SETUGT && Imm == -1)
7461return DAG.getConstant(0,DL, VT);
7462// Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7463 CCVal =ISD::getSetCCSwappedOperands(CCVal);
7464SDValue SetCC = DAG.getSetCC(
7465DL, VT,LHS, DAG.getSignedConstant(Imm + 1,DL, OpVT), CCVal);
7466return DAG.getLogicalNOT(DL, SetCC, VT);
7467 }
7468 }
7469
7470// Not a constant we could handle, swap the operands and condition code to
7471// SETLT/SETULT.
7472 CCVal =ISD::getSetCCSwappedOperands(CCVal);
7473return DAG.getSetCC(DL, VT,RHS,LHS, CCVal);
7474 }
7475
7476if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7477returnSplitVectorOp(Op, DAG);
7478
7479return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7480 }
7481caseISD::ADD:
7482caseISD::SUB:
7483caseISD::MUL:
7484caseISD::MULHS:
7485caseISD::MULHU:
7486caseISD::AND:
7487caseISD::OR:
7488caseISD::XOR:
7489caseISD::SDIV:
7490caseISD::SREM:
7491caseISD::UDIV:
7492caseISD::UREM:
7493caseISD::BSWAP:
7494caseISD::CTPOP:
7495return lowerToScalableOp(Op, DAG);
7496caseISD::SHL:
7497caseISD::SRA:
7498caseISD::SRL:
7499if (Op.getSimpleValueType().isFixedLengthVector())
7500return lowerToScalableOp(Op, DAG);
7501// This can be called for an i32 shift amount that needs to be promoted.
7502assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7503"Unexpected custom legalisation");
7504returnSDValue();
7505caseISD::FABS:
7506caseISD::FNEG:
7507if (Op.getValueType() == MVT::f16 ||Op.getValueType() == MVT::bf16)
7508returnlowerFABSorFNEG(Op, DAG, Subtarget);
7509 [[fallthrough]];
7510caseISD::FADD:
7511caseISD::FSUB:
7512caseISD::FMUL:
7513caseISD::FDIV:
7514caseISD::FSQRT:
7515caseISD::FMA:
7516caseISD::FMINNUM:
7517caseISD::FMAXNUM:
7518if (isPromotedOpNeedingSplit(Op, Subtarget))
7519returnSplitVectorOp(Op, DAG);
7520 [[fallthrough]];
7521caseISD::AVGFLOORS:
7522caseISD::AVGFLOORU:
7523caseISD::AVGCEILS:
7524caseISD::AVGCEILU:
7525caseISD::SMIN:
7526caseISD::SMAX:
7527caseISD::UMIN:
7528caseISD::UMAX:
7529caseISD::UADDSAT:
7530caseISD::USUBSAT:
7531caseISD::SADDSAT:
7532caseISD::SSUBSAT:
7533return lowerToScalableOp(Op, DAG);
7534caseISD::ABDS:
7535caseISD::ABDU: {
7536SDLoc dl(Op);
7537EVT VT =Op->getValueType(0);
7538SDValueLHS = DAG.getFreeze(Op->getOperand(0));
7539SDValueRHS = DAG.getFreeze(Op->getOperand(1));
7540bool IsSigned =Op->getOpcode() ==ISD::ABDS;
7541
7542// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7543// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7544unsigned MaxOpc = IsSigned ?ISD::SMAX :ISD::UMAX;
7545unsigned MinOpc = IsSigned ?ISD::SMIN :ISD::UMIN;
7546SDValue Max = DAG.getNode(MaxOpc, dl, VT,LHS,RHS);
7547SDValue Min = DAG.getNode(MinOpc, dl, VT,LHS,RHS);
7548return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7549 }
7550caseISD::ABS:
7551case ISD::VP_ABS:
7552return lowerABS(Op, DAG);
7553caseISD::CTLZ:
7554caseISD::CTLZ_ZERO_UNDEF:
7555caseISD::CTTZ:
7556caseISD::CTTZ_ZERO_UNDEF:
7557if (Subtarget.hasStdExtZvbb())
7558return lowerToScalableOp(Op, DAG);
7559assert(Op.getOpcode() !=ISD::CTTZ);
7560return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7561caseISD::VSELECT:
7562return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7563caseISD::FCOPYSIGN:
7564if (Op.getValueType() == MVT::f16 ||Op.getValueType() == MVT::bf16)
7565returnlowerFCOPYSIGN(Op, DAG, Subtarget);
7566if (isPromotedOpNeedingSplit(Op, Subtarget))
7567returnSplitVectorOp(Op, DAG);
7568return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7569caseISD::STRICT_FADD:
7570caseISD::STRICT_FSUB:
7571caseISD::STRICT_FMUL:
7572caseISD::STRICT_FDIV:
7573caseISD::STRICT_FSQRT:
7574caseISD::STRICT_FMA:
7575if (isPromotedOpNeedingSplit(Op, Subtarget))
7576returnSplitStrictFPVectorOp(Op, DAG);
7577return lowerToScalableOp(Op, DAG);
7578caseISD::STRICT_FSETCC:
7579caseISD::STRICT_FSETCCS:
7580return lowerVectorStrictFSetcc(Op, DAG);
7581caseISD::STRICT_FCEIL:
7582caseISD::STRICT_FRINT:
7583caseISD::STRICT_FFLOOR:
7584caseISD::STRICT_FTRUNC:
7585caseISD::STRICT_FNEARBYINT:
7586caseISD::STRICT_FROUND:
7587caseISD::STRICT_FROUNDEVEN:
7588returnlowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7589caseISD::MGATHER:
7590case ISD::VP_GATHER:
7591return lowerMaskedGather(Op, DAG);
7592caseISD::MSCATTER:
7593case ISD::VP_SCATTER:
7594return lowerMaskedScatter(Op, DAG);
7595caseISD::GET_ROUNDING:
7596return lowerGET_ROUNDING(Op, DAG);
7597caseISD::SET_ROUNDING:
7598return lowerSET_ROUNDING(Op, DAG);
7599caseISD::EH_DWARF_CFA:
7600return lowerEH_DWARF_CFA(Op, DAG);
7601case ISD::VP_MERGE:
7602if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7603return lowerVPMergeMask(Op, DAG);
7604 [[fallthrough]];
7605case ISD::VP_SELECT:
7606case ISD::VP_ADD:
7607case ISD::VP_SUB:
7608case ISD::VP_MUL:
7609case ISD::VP_SDIV:
7610case ISD::VP_UDIV:
7611case ISD::VP_SREM:
7612case ISD::VP_UREM:
7613case ISD::VP_UADDSAT:
7614case ISD::VP_USUBSAT:
7615case ISD::VP_SADDSAT:
7616case ISD::VP_SSUBSAT:
7617case ISD::VP_LRINT:
7618case ISD::VP_LLRINT:
7619return lowerVPOp(Op, DAG);
7620case ISD::VP_AND:
7621case ISD::VP_OR:
7622case ISD::VP_XOR:
7623return lowerLogicVPOp(Op, DAG);
7624case ISD::VP_FADD:
7625case ISD::VP_FSUB:
7626case ISD::VP_FMUL:
7627case ISD::VP_FDIV:
7628case ISD::VP_FNEG:
7629case ISD::VP_FABS:
7630case ISD::VP_SQRT:
7631case ISD::VP_FMA:
7632case ISD::VP_FMINNUM:
7633case ISD::VP_FMAXNUM:
7634case ISD::VP_FCOPYSIGN:
7635if (isPromotedOpNeedingSplit(Op, Subtarget))
7636returnSplitVPOp(Op, DAG);
7637 [[fallthrough]];
7638case ISD::VP_SRA:
7639case ISD::VP_SRL:
7640case ISD::VP_SHL:
7641return lowerVPOp(Op, DAG);
7642case ISD::VP_IS_FPCLASS:
7643return LowerIS_FPCLASS(Op, DAG);
7644case ISD::VP_SIGN_EXTEND:
7645case ISD::VP_ZERO_EXTEND:
7646if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7647return lowerVPExtMaskOp(Op, DAG);
7648return lowerVPOp(Op, DAG);
7649case ISD::VP_TRUNCATE:
7650return lowerVectorTruncLike(Op, DAG);
7651case ISD::VP_FP_EXTEND:
7652case ISD::VP_FP_ROUND:
7653return lowerVectorFPExtendOrRoundLike(Op, DAG);
7654case ISD::VP_SINT_TO_FP:
7655case ISD::VP_UINT_TO_FP:
7656if (Op.getValueType().isVector() &&
7657 ((Op.getValueType().getScalarType() == MVT::f16 &&
7658 (Subtarget.hasVInstructionsF16Minimal() &&
7659 !Subtarget.hasVInstructionsF16())) ||
7660Op.getValueType().getScalarType() == MVT::bf16)) {
7661if (isPromotedOpNeedingSplit(Op, Subtarget))
7662returnSplitVectorOp(Op, DAG);
7663// int -> f32
7664SDLocDL(Op);
7665MVT NVT =
7666MVT::getVectorVT(MVT::f32,Op.getValueType().getVectorElementCount());
7667autoNC = DAG.getNode(Op.getOpcode(),DL, NVT,Op->ops());
7668// f32 -> [b]f16
7669return DAG.getNode(ISD::FP_ROUND,DL,Op.getValueType(),NC,
7670 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));
7671 }
7672 [[fallthrough]];
7673case ISD::VP_FP_TO_SINT:
7674case ISD::VP_FP_TO_UINT:
7675if (SDValue Op1 =Op.getOperand(0);
7676 Op1.getValueType().isVector() &&
7677 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7678 (Subtarget.hasVInstructionsF16Minimal() &&
7679 !Subtarget.hasVInstructionsF16())) ||
7680 Op1.getValueType().getScalarType() == MVT::bf16)) {
7681if (isPromotedOpNeedingSplit(Op1, Subtarget))
7682returnSplitVectorOp(Op, DAG);
7683// [b]f16 -> f32
7684SDLocDL(Op);
7685MVT NVT =MVT::getVectorVT(MVT::f32,
7686 Op1.getValueType().getVectorElementCount());
7687SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND,DL, NVT, Op1);
7688// f32 -> int
7689return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),
7690 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7691 }
7692return lowerVPFPIntConvOp(Op, DAG);
7693case ISD::VP_SETCC:
7694if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7695returnSplitVPOp(Op, DAG);
7696if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7697return lowerVPSetCCMaskOp(Op, DAG);
7698 [[fallthrough]];
7699case ISD::VP_SMIN:
7700case ISD::VP_SMAX:
7701case ISD::VP_UMIN:
7702case ISD::VP_UMAX:
7703case ISD::VP_BITREVERSE:
7704case ISD::VP_BSWAP:
7705return lowerVPOp(Op, DAG);
7706case ISD::VP_CTLZ:
7707case ISD::VP_CTLZ_ZERO_UNDEF:
7708if (Subtarget.hasStdExtZvbb())
7709return lowerVPOp(Op, DAG);
7710return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7711case ISD::VP_CTTZ:
7712case ISD::VP_CTTZ_ZERO_UNDEF:
7713if (Subtarget.hasStdExtZvbb())
7714return lowerVPOp(Op, DAG);
7715return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7716case ISD::VP_CTPOP:
7717return lowerVPOp(Op, DAG);
7718case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7719return lowerVPStridedLoad(Op, DAG);
7720case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7721return lowerVPStridedStore(Op, DAG);
7722case ISD::VP_FCEIL:
7723case ISD::VP_FFLOOR:
7724case ISD::VP_FRINT:
7725case ISD::VP_FNEARBYINT:
7726case ISD::VP_FROUND:
7727case ISD::VP_FROUNDEVEN:
7728case ISD::VP_FROUNDTOZERO:
7729if (isPromotedOpNeedingSplit(Op, Subtarget))
7730returnSplitVPOp(Op, DAG);
7731returnlowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7732case ISD::VP_FMAXIMUM:
7733case ISD::VP_FMINIMUM:
7734if (isPromotedOpNeedingSplit(Op, Subtarget))
7735returnSplitVPOp(Op, DAG);
7736returnlowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7737case ISD::EXPERIMENTAL_VP_SPLICE:
7738return lowerVPSpliceExperimental(Op, DAG);
7739case ISD::EXPERIMENTAL_VP_REVERSE:
7740return lowerVPReverseExperimental(Op, DAG);
7741case ISD::EXPERIMENTAL_VP_SPLAT:
7742return lowerVPSplatExperimental(Op, DAG);
7743caseISD::CLEAR_CACHE: {
7744assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7745"llvm.clear_cache only needs custom lower on Linux targets");
7746SDLocDL(Op);
7747SDValue Flags = DAG.getConstant(0,DL, Subtarget.getXLenVT());
7748return emitFlushICache(DAG,Op.getOperand(0),Op.getOperand(1),
7749Op.getOperand(2), Flags,DL);
7750 }
7751caseISD::DYNAMIC_STACKALLOC:
7752return lowerDYNAMIC_STACKALLOC(Op, DAG);
7753caseISD::INIT_TRAMPOLINE:
7754return lowerINIT_TRAMPOLINE(Op, DAG);
7755caseISD::ADJUST_TRAMPOLINE:
7756return lowerADJUST_TRAMPOLINE(Op, DAG);
7757 }
7758}
7759
7760SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG,SDValue InChain,
7761SDValue Start,SDValueEnd,
7762SDValue Flags,SDLocDL) const{
7763 MakeLibCallOptions CallOptions;
7764 std::pair<SDValue, SDValue> CallResult =
7765makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7766 {Start,End, Flags}, CallOptions,DL, InChain);
7767
7768// This function returns void so only the out chain matters.
7769return CallResult.second;
7770}
7771
7772SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValueOp,
7773SelectionDAG &DAG) const{
7774if (!Subtarget.is64Bit())
7775llvm::report_fatal_error("Trampolines only implemented for RV64");
7776
7777// Create an MCCodeEmitter to encode instructions.
7778TargetLoweringObjectFile *TLO =getTargetMachine().getObjFileLowering();
7779assert(TLO);
7780MCContext &MCCtx = TLO->getContext();
7781
7782 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7783createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7784
7785SDValue Root =Op.getOperand(0);
7786SDValue Trmp =Op.getOperand(1);// trampoline
7787SDLoc dl(Op);
7788
7789constValue *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7790
7791// We store in the trampoline buffer the following instructions and data.
7792// Offset:
7793// 0: auipc t2, 0
7794// 4: ld t0, 24(t2)
7795// 8: ld t2, 16(t2)
7796// 12: jalr t0
7797// 16: <StaticChainOffset>
7798// 24: <FunctionAddressOffset>
7799// 32:
7800
7801constexprunsigned StaticChainOffset = 16;
7802constexprunsigned FunctionAddressOffset = 24;
7803
7804constMCSubtargetInfo *STI =getTargetMachine().getMCSubtargetInfo();
7805assert(STI);
7806auto GetEncoding = [&](constMCInst &MC) {
7807SmallVector<char, 4> CB;
7808SmallVector<MCFixup>Fixups;
7809 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7810uint32_t Encoding =support::endian::read32le(CB.data());
7811return Encoding;
7812 };
7813
7814SDValue OutChains[6];
7815
7816uint32_t Encodings[] = {
7817// auipc t2, 0
7818// Loads the current PC into t2.
7819 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7820// ld t0, 24(t2)
7821// Loads the function address into t0. Note that we are using offsets
7822// pc-relative to the first instruction of the trampoline.
7823 GetEncoding(
7824MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7825 FunctionAddressOffset)),
7826// ld t2, 16(t2)
7827// Load the value of the static chain.
7828 GetEncoding(
7829MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7830 StaticChainOffset)),
7831// jalr t0
7832// Jump to the function.
7833 GetEncoding(MCInstBuilder(RISCV::JALR)
7834 .addReg(RISCV::X0)
7835 .addReg(RISCV::X5)
7836 .addImm(0))};
7837
7838// Store encoded instructions.
7839for (auto [Idx, Encoding] :llvm::enumerate(Encodings)) {
7840SDValueAddr =Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7841 DAG.getConstant(Idx * 4, dl, MVT::i64))
7842 : Trmp;
7843 OutChains[Idx] = DAG.getTruncStore(
7844 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64),Addr,
7845MachinePointerInfo(TrmpAddr,Idx * 4), MVT::i32);
7846 }
7847
7848// Now store the variable part of the trampoline.
7849SDValue FunctionAddress =Op.getOperand(2);
7850SDValue StaticChain =Op.getOperand(3);
7851
7852// Store the given static chain and function pointer in the trampoline buffer.
7853structOffsetValuePair {
7854constunsignedOffset;
7855constSDValueValue;
7856SDValueAddr =SDValue();// Used to cache the address.
7857 } OffsetValues[] = {
7858 {StaticChainOffset, StaticChain},
7859 {FunctionAddressOffset, FunctionAddress},
7860 };
7861for (auto [Idx, OffsetValue] :llvm::enumerate(OffsetValues)) {
7862SDValueAddr =
7863 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7864 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7865 OffsetValue.Addr =Addr;
7866 OutChains[Idx + 4] =
7867 DAG.getStore(Root, dl, OffsetValue.Value,Addr,
7868MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7869 }
7870
7871SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7872
7873// The end of instructions of trampoline is the same as the static chain
7874// address that we computed earlier.
7875SDValue EndOfTrmp = OffsetValues[0].Addr;
7876
7877// Call clear cache on the trampoline instructions.
7878SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7879 Trmp, EndOfTrmp);
7880
7881return Chain;
7882}
7883
7884SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValueOp,
7885SelectionDAG &DAG) const{
7886if (!Subtarget.is64Bit())
7887llvm::report_fatal_error("Trampolines only implemented for RV64");
7888
7889returnOp.getOperand(0);
7890}
7891
7892staticSDValuegetTargetNode(GlobalAddressSDNode *N,constSDLoc &DL,EVT Ty,
7893SelectionDAG &DAG,unsigned Flags) {
7894return DAG.getTargetGlobalAddress(N->getGlobal(),DL, Ty, 0, Flags);
7895}
7896
7897staticSDValuegetTargetNode(BlockAddressSDNode *N,constSDLoc &DL,EVT Ty,
7898SelectionDAG &DAG,unsigned Flags) {
7899return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty,N->getOffset(),
7900 Flags);
7901}
7902
7903staticSDValuegetTargetNode(ConstantPoolSDNode *N,constSDLoc &DL,EVT Ty,
7904SelectionDAG &DAG,unsigned Flags) {
7905return DAG.getTargetConstantPool(N->getConstVal(), Ty,N->getAlign(),
7906N->getOffset(), Flags);
7907}
7908
7909staticSDValuegetTargetNode(JumpTableSDNode *N,constSDLoc &DL,EVT Ty,
7910SelectionDAG &DAG,unsigned Flags) {
7911return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7912}
7913
7914staticSDValuegetLargeGlobalAddress(GlobalAddressSDNode *N,constSDLoc &DL,
7915EVT Ty,SelectionDAG &DAG) {
7916RISCVConstantPoolValue *CPV =RISCVConstantPoolValue::Create(N->getGlobal());
7917SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty,Align(8));
7918SDValue LC = DAG.getNode(RISCVISD::LLA,DL, Ty, CPAddr);
7919return DAG.getLoad(
7920 Ty,DL, DAG.getEntryNode(), LC,
7921MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7922}
7923
7924staticSDValuegetLargeExternalSymbol(ExternalSymbolSDNode *N,constSDLoc &DL,
7925EVT Ty,SelectionDAG &DAG) {
7926RISCVConstantPoolValue *CPV =
7927RISCVConstantPoolValue::Create(*DAG.getContext(),N->getSymbol());
7928SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty,Align(8));
7929SDValue LC = DAG.getNode(RISCVISD::LLA,DL, Ty, CPAddr);
7930return DAG.getLoad(
7931 Ty,DL, DAG.getEntryNode(), LC,
7932MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7933}
7934
7935template <class NodeTy>
7936SDValue RISCVTargetLowering::getAddr(NodeTy *N,SelectionDAG &DAG,
7937bool IsLocal,bool IsExternWeak) const{
7938SDLocDL(N);
7939EVT Ty =getPointerTy(DAG.getDataLayout());
7940
7941// When HWASAN is used and tagging of global variables is enabled
7942// they should be accessed via the GOT, since the tagged address of a global
7943// is incompatible with existing code models. This also applies to non-pic
7944// mode.
7945if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7946SDValueAddr =getTargetNode(N,DL, Ty, DAG, 0);
7947if (IsLocal && !Subtarget.allowTaggedGlobals())
7948// Use PC-relative addressing to access the symbol. This generates the
7949// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7950// %pcrel_lo(auipc)).
7951return DAG.getNode(RISCVISD::LLA,DL, Ty,Addr);
7952
7953// Use PC-relative addressing to access the GOT for this symbol, then load
7954// the address from the GOT. This generates the pattern (PseudoLGA sym),
7955// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7956SDValueLoad =
7957SDValue(DAG.getMachineNode(RISCV::PseudoLGA,DL, Ty,Addr), 0);
7958MachineFunction &MF = DAG.getMachineFunction();
7959MachineMemOperand *MemOp = MF.getMachineMemOperand(
7960MachinePointerInfo::getGOT(MF),
7961MachineMemOperand::MOLoad |MachineMemOperand::MODereferenceable |
7962MachineMemOperand::MOInvariant,
7963LLT(Ty.getSimpleVT()),Align(Ty.getFixedSizeInBits() / 8));
7964 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7965returnLoad;
7966 }
7967
7968switch (getTargetMachine().getCodeModel()) {
7969default:
7970report_fatal_error("Unsupported code model for lowering");
7971caseCodeModel::Small: {
7972// Generate a sequence for accessing addresses within the first 2 GiB of
7973// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7974SDValue AddrHi =getTargetNode(N,DL, Ty, DAG,RISCVII::MO_HI);
7975SDValue AddrLo =getTargetNode(N,DL, Ty, DAG,RISCVII::MO_LO);
7976SDValue MNHi = DAG.getNode(RISCVISD::HI,DL, Ty, AddrHi);
7977return DAG.getNode(RISCVISD::ADD_LO,DL, Ty, MNHi, AddrLo);
7978 }
7979caseCodeModel::Medium: {
7980SDValueAddr =getTargetNode(N,DL, Ty, DAG, 0);
7981if (IsExternWeak) {
7982// An extern weak symbol may be undefined, i.e. have value 0, which may
7983// not be within 2GiB of PC, so use GOT-indirect addressing to access the
7984// symbol. This generates the pattern (PseudoLGA sym), which expands to
7985// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7986SDValueLoad =
7987SDValue(DAG.getMachineNode(RISCV::PseudoLGA,DL, Ty,Addr), 0);
7988MachineFunction &MF = DAG.getMachineFunction();
7989MachineMemOperand *MemOp = MF.getMachineMemOperand(
7990MachinePointerInfo::getGOT(MF),
7991MachineMemOperand::MOLoad |MachineMemOperand::MODereferenceable |
7992MachineMemOperand::MOInvariant,
7993LLT(Ty.getSimpleVT()),Align(Ty.getFixedSizeInBits() / 8));
7994 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7995returnLoad;
7996 }
7997
7998// Generate a sequence for accessing addresses within any 2GiB range within
7999// the address space. This generates the pattern (PseudoLLA sym), which
8000// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8001return DAG.getNode(RISCVISD::LLA,DL, Ty,Addr);
8002 }
8003caseCodeModel::Large: {
8004if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8005returngetLargeGlobalAddress(G,DL, Ty, DAG);
8006
8007// Using pc-relative mode for other node type.
8008SDValueAddr =getTargetNode(N,DL, Ty, DAG, 0);
8009return DAG.getNode(RISCVISD::LLA,DL, Ty,Addr);
8010 }
8011 }
8012}
8013
8014SDValue RISCVTargetLowering::lowerGlobalAddress(SDValueOp,
8015SelectionDAG &DAG) const{
8016GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8017assert(N->getOffset() == 0 &&"unexpected offset in global node");
8018constGlobalValue *GV =N->getGlobal();
8019return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8020}
8021
8022SDValue RISCVTargetLowering::lowerBlockAddress(SDValueOp,
8023SelectionDAG &DAG) const{
8024BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8025
8026return getAddr(N, DAG);
8027}
8028
8029SDValue RISCVTargetLowering::lowerConstantPool(SDValueOp,
8030SelectionDAG &DAG) const{
8031ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8032
8033return getAddr(N, DAG);
8034}
8035
8036SDValue RISCVTargetLowering::lowerJumpTable(SDValueOp,
8037SelectionDAG &DAG) const{
8038JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8039
8040return getAddr(N, DAG);
8041}
8042
8043SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8044SelectionDAG &DAG,
8045bool UseGOT) const{
8046SDLocDL(N);
8047EVT Ty =getPointerTy(DAG.getDataLayout());
8048constGlobalValue *GV =N->getGlobal();
8049MVT XLenVT = Subtarget.getXLenVT();
8050
8051if (UseGOT) {
8052// Use PC-relative addressing to access the GOT for this TLS symbol, then
8053// load the address from the GOT and add the thread pointer. This generates
8054// the pattern (PseudoLA_TLS_IE sym), which expands to
8055// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8056SDValueAddr = DAG.getTargetGlobalAddress(GV,DL, Ty, 0, 0);
8057SDValueLoad =
8058SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE,DL, Ty,Addr), 0);
8059MachineFunction &MF = DAG.getMachineFunction();
8060MachineMemOperand *MemOp = MF.getMachineMemOperand(
8061MachinePointerInfo::getGOT(MF),
8062MachineMemOperand::MOLoad |MachineMemOperand::MODereferenceable |
8063MachineMemOperand::MOInvariant,
8064LLT(Ty.getSimpleVT()),Align(Ty.getFixedSizeInBits() / 8));
8065 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8066
8067// Add the thread pointer.
8068SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8069return DAG.getNode(ISD::ADD,DL, Ty, Load, TPReg);
8070 }
8071
8072// Generate a sequence for accessing the address relative to the thread
8073// pointer, with the appropriate adjustment for the thread pointer offset.
8074// This generates the pattern
8075// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8076SDValue AddrHi =
8077 DAG.getTargetGlobalAddress(GV,DL, Ty, 0,RISCVII::MO_TPREL_HI);
8078SDValue AddrAdd =
8079 DAG.getTargetGlobalAddress(GV,DL, Ty, 0,RISCVII::MO_TPREL_ADD);
8080SDValue AddrLo =
8081 DAG.getTargetGlobalAddress(GV,DL, Ty, 0,RISCVII::MO_TPREL_LO);
8082
8083SDValue MNHi = DAG.getNode(RISCVISD::HI,DL, Ty, AddrHi);
8084SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8085SDValue MNAdd =
8086 DAG.getNode(RISCVISD::ADD_TPREL,DL, Ty, MNHi, TPReg, AddrAdd);
8087return DAG.getNode(RISCVISD::ADD_LO,DL, Ty, MNAdd, AddrLo);
8088}
8089
8090SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8091SelectionDAG &DAG) const{
8092SDLocDL(N);
8093EVT Ty =getPointerTy(DAG.getDataLayout());
8094IntegerType *CallTy =Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8095constGlobalValue *GV =N->getGlobal();
8096
8097// Use a PC-relative addressing mode to access the global dynamic GOT address.
8098// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8099// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8100SDValueAddr = DAG.getTargetGlobalAddress(GV,DL, Ty, 0, 0);
8101SDValueLoad =
8102SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD,DL, Ty,Addr), 0);
8103
8104// Prepare argument list to generate call.
8105ArgListTyArgs;
8106 ArgListEntryEntry;
8107Entry.Node =Load;
8108Entry.Ty = CallTy;
8109Args.push_back(Entry);
8110
8111// Setup call to __tls_get_addr.
8112TargetLowering::CallLoweringInfo CLI(DAG);
8113 CLI.setDebugLoc(DL)
8114 .setChain(DAG.getEntryNode())
8115 .setLibCallee(CallingConv::C, CallTy,
8116 DAG.getExternalSymbol("__tls_get_addr", Ty),
8117 std::move(Args));
8118
8119returnLowerCallTo(CLI).first;
8120}
8121
8122SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8123SelectionDAG &DAG) const{
8124SDLocDL(N);
8125EVT Ty =getPointerTy(DAG.getDataLayout());
8126constGlobalValue *GV =N->getGlobal();
8127
8128// Use a PC-relative addressing mode to access the global dynamic GOT address.
8129// This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8130//
8131// auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8132// lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8133// addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8134// jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8135SDValueAddr = DAG.getTargetGlobalAddress(GV,DL, Ty, 0, 0);
8136returnSDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC,DL, Ty,Addr), 0);
8137}
8138
8139SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValueOp,
8140SelectionDAG &DAG) const{
8141GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8142assert(N->getOffset() == 0 &&"unexpected offset in global node");
8143
8144if (DAG.getTarget().useEmulatedTLS())
8145returnLowerToTLSEmulatedModel(N, DAG);
8146
8147TLSModel::ModelModel =getTargetMachine().getTLSModel(N->getGlobal());
8148
8149if (DAG.getMachineFunction().getFunction().getCallingConv() ==
8150CallingConv::GHC)
8151report_fatal_error("In GHC calling convention TLS is not supported");
8152
8153SDValueAddr;
8154switch (Model) {
8155caseTLSModel::LocalExec:
8156Addr = getStaticTLSAddr(N, DAG,/*UseGOT=*/false);
8157break;
8158caseTLSModel::InitialExec:
8159Addr = getStaticTLSAddr(N, DAG,/*UseGOT=*/true);
8160break;
8161caseTLSModel::LocalDynamic:
8162caseTLSModel::GeneralDynamic:
8163Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8164 : getDynamicTLSAddr(N, DAG);
8165break;
8166 }
8167
8168returnAddr;
8169}
8170
8171// Return true if Val is equal to (setcc LHS, RHS, CC).
8172// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8173// Otherwise, return std::nullopt.
8174static std::optional<bool>matchSetCC(SDValue LHS,SDValue RHS,
8175ISD::CondCodeCC,SDValue Val) {
8176assert(Val->getOpcode() ==ISD::SETCC);
8177SDValue LHS2 = Val.getOperand(0);
8178SDValue RHS2 = Val.getOperand(1);
8179ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8180
8181if (LHS == LHS2 &&RHS == RHS2) {
8182if (CC == CC2)
8183returntrue;
8184if (CC ==ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8185returnfalse;
8186 }elseif (LHS == RHS2 &&RHS == LHS2) {
8187 CC2 =ISD::getSetCCSwappedOperands(CC2);
8188if (CC == CC2)
8189returntrue;
8190if (CC ==ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8191returnfalse;
8192 }
8193
8194return std::nullopt;
8195}
8196
8197staticSDValuecombineSelectToBinOp(SDNode *N,SelectionDAG &DAG,
8198constRISCVSubtarget &Subtarget) {
8199SDValue CondV =N->getOperand(0);
8200SDValue TrueV =N->getOperand(1);
8201SDValue FalseV =N->getOperand(2);
8202MVT VT =N->getSimpleValueType(0);
8203SDLocDL(N);
8204
8205if (!Subtarget.hasConditionalMoveFusion()) {
8206// (select c, -1, y) -> -c | y
8207if (isAllOnesConstant(TrueV)) {
8208SDValue Neg = DAG.getNegative(CondV,DL, VT);
8209return DAG.getNode(ISD::OR,DL, VT, Neg, DAG.getFreeze(FalseV));
8210 }
8211// (select c, y, -1) -> (c-1) | y
8212if (isAllOnesConstant(FalseV)) {
8213SDValue Neg = DAG.getNode(ISD::ADD,DL, VT, CondV,
8214 DAG.getAllOnesConstant(DL, VT));
8215return DAG.getNode(ISD::OR,DL, VT, Neg, DAG.getFreeze(TrueV));
8216 }
8217
8218// (select c, 0, y) -> (c-1) & y
8219if (isNullConstant(TrueV)) {
8220SDValue Neg = DAG.getNode(ISD::ADD,DL, VT, CondV,
8221 DAG.getAllOnesConstant(DL, VT));
8222return DAG.getNode(ISD::AND,DL, VT, Neg, DAG.getFreeze(FalseV));
8223 }
8224// (select c, y, 0) -> -c & y
8225if (isNullConstant(FalseV)) {
8226SDValue Neg = DAG.getNegative(CondV,DL, VT);
8227return DAG.getNode(ISD::AND,DL, VT, Neg, DAG.getFreeze(TrueV));
8228 }
8229 }
8230
8231// select c, ~x, x --> xor -c, x
8232if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8233constAPInt &TrueVal = TrueV->getAsAPIntVal();
8234constAPInt &FalseVal = FalseV->getAsAPIntVal();
8235if (~TrueVal == FalseVal) {
8236SDValue Neg = DAG.getNegative(CondV,DL, VT);
8237return DAG.getNode(ISD::XOR,DL, VT, Neg, FalseV);
8238 }
8239 }
8240
8241// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8242// when both truev and falsev are also setcc.
8243if (CondV.getOpcode() ==ISD::SETCC && TrueV.getOpcode() ==ISD::SETCC &&
8244 FalseV.getOpcode() ==ISD::SETCC) {
8245SDValueLHS = CondV.getOperand(0);
8246SDValueRHS = CondV.getOperand(1);
8247ISD::CondCodeCC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8248
8249// (select x, x, y) -> x | y
8250// (select !x, x, y) -> x & y
8251if (std::optional<bool> MatchResult =matchSetCC(LHS,RHS,CC, TrueV)) {
8252return DAG.getNode(*MatchResult ?ISD::OR :ISD::AND,DL, VT, TrueV,
8253 DAG.getFreeze(FalseV));
8254 }
8255// (select x, y, x) -> x & y
8256// (select !x, y, x) -> x | y
8257if (std::optional<bool> MatchResult =matchSetCC(LHS,RHS,CC, FalseV)) {
8258return DAG.getNode(*MatchResult ?ISD::AND :ISD::OR,DL, VT,
8259 DAG.getFreeze(TrueV), FalseV);
8260 }
8261 }
8262
8263returnSDValue();
8264}
8265
8266// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8267// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8268// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8269// being `0` or `-1`. In such cases we can replace `select` with `and`.
8270// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8271// than `c0`?
8272staticSDValue
8273foldBinOpIntoSelectIfProfitable(SDNode *BO,SelectionDAG &DAG,
8274constRISCVSubtarget &Subtarget) {
8275if (Subtarget.hasShortForwardBranchOpt())
8276returnSDValue();
8277
8278unsigned SelOpNo = 0;
8279SDValue Sel = BO->getOperand(0);
8280if (Sel.getOpcode() !=ISD::SELECT || !Sel.hasOneUse()) {
8281 SelOpNo = 1;
8282 Sel = BO->getOperand(1);
8283 }
8284
8285if (Sel.getOpcode() !=ISD::SELECT || !Sel.hasOneUse())
8286returnSDValue();
8287
8288unsigned ConstSelOpNo = 1;
8289unsigned OtherSelOpNo = 2;
8290if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8291 ConstSelOpNo = 2;
8292 OtherSelOpNo = 1;
8293 }
8294SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8295ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8296if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8297returnSDValue();
8298
8299SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8300ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8301if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8302returnSDValue();
8303
8304SDLocDL(Sel);
8305EVT VT = BO->getValueType(0);
8306
8307SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8308if (SelOpNo == 1)
8309std::swap(NewConstOps[0], NewConstOps[1]);
8310
8311SDValue NewConstOp =
8312 DAG.FoldConstantArithmetic(BO->getOpcode(),DL, VT, NewConstOps);
8313if (!NewConstOp)
8314returnSDValue();
8315
8316constAPInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8317if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8318returnSDValue();
8319
8320SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8321SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8322if (SelOpNo == 1)
8323std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8324SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(),DL, VT, NewNonConstOps);
8325
8326SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8327SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8328return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8329}
8330
8331SDValue RISCVTargetLowering::lowerSELECT(SDValueOp,SelectionDAG &DAG) const{
8332SDValue CondV =Op.getOperand(0);
8333SDValue TrueV =Op.getOperand(1);
8334SDValue FalseV =Op.getOperand(2);
8335SDLocDL(Op);
8336MVT VT =Op.getSimpleValueType();
8337MVT XLenVT = Subtarget.getXLenVT();
8338
8339// Lower vector SELECTs to VSELECTs by splatting the condition.
8340if (VT.isVector()) {
8341MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8342SDValue CondSplat = DAG.getSplat(SplatCondVT,DL, CondV);
8343return DAG.getNode(ISD::VSELECT,DL, VT, CondSplat, TrueV, FalseV);
8344 }
8345
8346// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8347// nodes to implement the SELECT. Performing the lowering here allows for
8348// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8349// sequence or RISCVISD::SELECT_CC node (branch-based select).
8350if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8351 VT.isScalarInteger()) {
8352// (select c, t, 0) -> (czero_eqz t, c)
8353if (isNullConstant(FalseV))
8354return DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV, CondV);
8355// (select c, 0, f) -> (czero_nez f, c)
8356if (isNullConstant(TrueV))
8357return DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV, CondV);
8358
8359// (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8360if (TrueV.getOpcode() ==ISD::AND &&
8361 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8362return DAG.getNode(
8363ISD::OR,DL, VT, TrueV,
8364 DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV, CondV));
8365// (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8366if (FalseV.getOpcode() ==ISD::AND &&
8367 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8368return DAG.getNode(
8369ISD::OR,DL, VT, FalseV,
8370 DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV, CondV));
8371
8372// Try some other optimizations before falling back to generic lowering.
8373if (SDValue V =combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8374returnV;
8375
8376// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8377// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8378if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8379constAPInt &TrueVal = TrueV->getAsAPIntVal();
8380constAPInt &FalseVal = FalseV->getAsAPIntVal();
8381constint TrueValCost =RISCVMatInt::getIntMatCost(
8382 TrueVal, Subtarget.getXLen(), Subtarget,/*CompressionCost=*/true);
8383constint FalseValCost =RISCVMatInt::getIntMatCost(
8384 FalseVal, Subtarget.getXLen(), Subtarget,/*CompressionCost=*/true);
8385bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8386SDValue LHSVal = DAG.getConstant(
8387 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal,DL, VT);
8388SDValue RHSVal =
8389 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal,DL, VT);
8390SDValueCMOV =
8391 DAG.getNode(IsCZERO_NEZ ?RISCVISD::CZERO_NEZ :RISCVISD::CZERO_EQZ,
8392DL, VT, LHSVal, CondV);
8393return DAG.getNode(ISD::ADD,DL, VT, CMOV, RHSVal);
8394 }
8395
8396// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8397// Unless we have the short forward branch optimization.
8398if (!Subtarget.hasConditionalMoveFusion())
8399return DAG.getNode(
8400ISD::OR,DL, VT,
8401 DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV, CondV),
8402 DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV, CondV));
8403 }
8404
8405if (SDValue V =combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8406returnV;
8407
8408if (Op.hasOneUse()) {
8409unsigned UseOpc =Op->user_begin()->getOpcode();
8410if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8411SDNode *BinOp = *Op->user_begin();
8412if (SDValue NewSel =foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8413 DAG, Subtarget)) {
8414 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8415// Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8416// may return a constant node and cause crash in lowerSELECT.
8417if (NewSel.getOpcode() ==ISD::SELECT)
8418return lowerSELECT(NewSel, DAG);
8419return NewSel;
8420 }
8421 }
8422 }
8423
8424// (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8425// (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8426constConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8427constConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8428if (FPTV && FPFV) {
8429if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8430return DAG.getNode(ISD::SINT_TO_FP,DL, VT, CondV);
8431if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8432SDValueXOR = DAG.getNode(ISD::XOR,DL, XLenVT, CondV,
8433 DAG.getConstant(1,DL, XLenVT));
8434return DAG.getNode(ISD::SINT_TO_FP,DL, VT, XOR);
8435 }
8436 }
8437
8438// If the condition is not an integer SETCC which operates on XLenVT, we need
8439// to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8440// (select condv, truev, falsev)
8441// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8442if (CondV.getOpcode() !=ISD::SETCC ||
8443 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8444SDValueZero = DAG.getConstant(0,DL, XLenVT);
8445SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8446
8447SDValue Ops[] = {CondV,Zero, SetNE, TrueV, FalseV};
8448
8449return DAG.getNode(RISCVISD::SELECT_CC,DL, VT, Ops);
8450 }
8451
8452// If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8453// then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8454// advantage of the integer compare+branch instructions. i.e.:
8455// (select (setcc lhs, rhs, cc), truev, falsev)
8456// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8457SDValueLHS = CondV.getOperand(0);
8458SDValueRHS = CondV.getOperand(1);
8459ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8460
8461// Special case for a select of 2 constants that have a diffence of 1.
8462// Normally this is done by DAGCombine, but if the select is introduced by
8463// type legalization or op legalization, we miss it. Restricting to SETLT
8464// case for now because that is what signed saturating add/sub need.
8465// FIXME: We don't need the condition to be SETLT or even a SETCC,
8466// but we would probably want to swap the true/false values if the condition
8467// is SETGE/SETLE to avoid an XORI.
8468if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8469 CCVal ==ISD::SETLT) {
8470constAPInt &TrueVal = TrueV->getAsAPIntVal();
8471constAPInt &FalseVal = FalseV->getAsAPIntVal();
8472if (TrueVal - 1 == FalseVal)
8473return DAG.getNode(ISD::ADD,DL, VT, CondV, FalseV);
8474if (TrueVal + 1 == FalseVal)
8475return DAG.getNode(ISD::SUB,DL, VT, FalseV, CondV);
8476 }
8477
8478translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8479// 1 < x ? x : 1 -> 0 < x ? x : 1
8480if (isOneConstant(LHS) && (CCVal ==ISD::SETLT || CCVal ==ISD::SETULT) &&
8481 RHS == TrueV && LHS == FalseV) {
8482LHS = DAG.getConstant(0,DL, VT);
8483// 0 <u x is the same as x != 0.
8484if (CCVal ==ISD::SETULT) {
8485std::swap(LHS, RHS);
8486 CCVal =ISD::SETNE;
8487 }
8488 }
8489
8490// x <s -1 ? x : -1 -> x <s 0 ? x : -1
8491if (isAllOnesConstant(RHS) && CCVal ==ISD::SETLT && LHS == TrueV &&
8492 RHS == FalseV) {
8493RHS = DAG.getConstant(0,DL, VT);
8494 }
8495
8496SDValue TargetCC = DAG.getCondCode(CCVal);
8497
8498if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8499// (select (setcc lhs, rhs, CC), constant, falsev)
8500// -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8501std::swap(TrueV, FalseV);
8502 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal,LHS.getValueType()));
8503 }
8504
8505SDValue Ops[] = {LHS,RHS, TargetCC, TrueV, FalseV};
8506return DAG.getNode(RISCVISD::SELECT_CC,DL, VT, Ops);
8507}
8508
8509SDValue RISCVTargetLowering::lowerBRCOND(SDValueOp,SelectionDAG &DAG) const{
8510SDValue CondV =Op.getOperand(1);
8511SDLocDL(Op);
8512MVT XLenVT = Subtarget.getXLenVT();
8513
8514if (CondV.getOpcode() ==ISD::SETCC &&
8515 CondV.getOperand(0).getValueType() == XLenVT) {
8516SDValueLHS = CondV.getOperand(0);
8517SDValueRHS = CondV.getOperand(1);
8518ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8519
8520translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8521
8522SDValue TargetCC = DAG.getCondCode(CCVal);
8523return DAG.getNode(RISCVISD::BR_CC,DL,Op.getValueType(),Op.getOperand(0),
8524 LHS, RHS, TargetCC,Op.getOperand(2));
8525 }
8526
8527return DAG.getNode(RISCVISD::BR_CC,DL,Op.getValueType(),Op.getOperand(0),
8528 CondV, DAG.getConstant(0,DL, XLenVT),
8529 DAG.getCondCode(ISD::SETNE),Op.getOperand(2));
8530}
8531
8532SDValue RISCVTargetLowering::lowerVASTART(SDValueOp,SelectionDAG &DAG) const{
8533MachineFunction &MF = DAG.getMachineFunction();
8534RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
8535
8536SDLocDL(Op);
8537SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8538getPointerTy(MF.getDataLayout()));
8539
8540// vastart just stores the address of the VarArgsFrameIndex slot into the
8541// memory location argument.
8542constValue *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8543return DAG.getStore(Op.getOperand(0),DL, FI,Op.getOperand(1),
8544MachinePointerInfo(SV));
8545}
8546
8547SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValueOp,
8548SelectionDAG &DAG) const{
8549constRISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8550MachineFunction &MF = DAG.getMachineFunction();
8551MachineFrameInfo &MFI = MF.getFrameInfo();
8552 MFI.setFrameAddressIsTaken(true);
8553Register FrameReg = RI.getFrameRegister(MF);
8554int XLenInBytes = Subtarget.getXLen() / 8;
8555
8556EVT VT =Op.getValueType();
8557SDLocDL(Op);
8558SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(),DL, FrameReg, VT);
8559unsignedDepth =Op.getConstantOperandVal(0);
8560while (Depth--) {
8561intOffset = -(XLenInBytes * 2);
8562SDValuePtr = DAG.getNode(
8563ISD::ADD,DL, VT, FrameAddr,
8564 DAG.getSignedConstant(Offset,DL,getPointerTy(DAG.getDataLayout())));
8565 FrameAddr =
8566 DAG.getLoad(VT,DL, DAG.getEntryNode(),Ptr,MachinePointerInfo());
8567 }
8568return FrameAddr;
8569}
8570
8571SDValue RISCVTargetLowering::lowerRETURNADDR(SDValueOp,
8572SelectionDAG &DAG) const{
8573constRISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8574MachineFunction &MF = DAG.getMachineFunction();
8575MachineFrameInfo &MFI = MF.getFrameInfo();
8576 MFI.setReturnAddressIsTaken(true);
8577MVT XLenVT = Subtarget.getXLenVT();
8578int XLenInBytes = Subtarget.getXLen() / 8;
8579
8580if (verifyReturnAddressArgumentIsConstant(Op, DAG))
8581returnSDValue();
8582
8583EVT VT =Op.getValueType();
8584SDLocDL(Op);
8585unsignedDepth =Op.getConstantOperandVal(0);
8586if (Depth) {
8587intOff = -XLenInBytes;
8588SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8589SDValueOffset = DAG.getSignedConstant(Off,DL, VT);
8590return DAG.getLoad(VT,DL, DAG.getEntryNode(),
8591 DAG.getNode(ISD::ADD,DL, VT, FrameAddr,Offset),
8592MachinePointerInfo());
8593 }
8594
8595// Return the value of the return address register, marking it an implicit
8596// live-in.
8597RegisterReg = MF.addLiveIn(RI.getRARegister(),getRegClassFor(XLenVT));
8598return DAG.getCopyFromReg(DAG.getEntryNode(),DL, Reg, XLenVT);
8599}
8600
8601SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValueOp,
8602SelectionDAG &DAG) const{
8603SDLocDL(Op);
8604SDValueLo =Op.getOperand(0);
8605SDValueHi =Op.getOperand(1);
8606SDValue Shamt =Op.getOperand(2);
8607EVT VT =Lo.getValueType();
8608
8609// if Shamt-XLEN < 0: // Shamt < XLEN
8610// Lo = Lo << Shamt
8611// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8612// else:
8613// Lo = 0
8614// Hi = Lo << (Shamt-XLEN)
8615
8616SDValueZero = DAG.getConstant(0,DL, VT);
8617SDValue One = DAG.getConstant(1,DL, VT);
8618SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(),DL, VT);
8619SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1,DL, VT);
8620SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD,DL, VT, Shamt, MinusXLen);
8621SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB,DL, VT, XLenMinus1, Shamt);
8622
8623SDValue LoTrue = DAG.getNode(ISD::SHL,DL, VT,Lo, Shamt);
8624SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL,DL, VT,Lo, One);
8625SDValue ShiftRightLo =
8626 DAG.getNode(ISD::SRL,DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8627SDValue ShiftLeftHi = DAG.getNode(ISD::SHL,DL, VT,Hi, Shamt);
8628SDValue HiTrue = DAG.getNode(ISD::OR,DL, VT, ShiftLeftHi, ShiftRightLo);
8629SDValue HiFalse = DAG.getNode(ISD::SHL,DL, VT,Lo, ShamtMinusXLen);
8630
8631SDValueCC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero,ISD::SETLT);
8632
8633Lo = DAG.getNode(ISD::SELECT,DL, VT,CC, LoTrue, Zero);
8634Hi = DAG.getNode(ISD::SELECT,DL, VT,CC, HiTrue, HiFalse);
8635
8636SDValue Parts[2] = {Lo,Hi};
8637return DAG.getMergeValues(Parts,DL);
8638}
8639
8640SDValue RISCVTargetLowering::lowerShiftRightParts(SDValueOp,SelectionDAG &DAG,
8641bool IsSRA) const{
8642SDLocDL(Op);
8643SDValueLo =Op.getOperand(0);
8644SDValueHi =Op.getOperand(1);
8645SDValue Shamt =Op.getOperand(2);
8646EVT VT =Lo.getValueType();
8647
8648// SRA expansion:
8649// if Shamt-XLEN < 0: // Shamt < XLEN
8650// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8651// Hi = Hi >>s Shamt
8652// else:
8653// Lo = Hi >>s (Shamt-XLEN);
8654// Hi = Hi >>s (XLEN-1)
8655//
8656// SRL expansion:
8657// if Shamt-XLEN < 0: // Shamt < XLEN
8658// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8659// Hi = Hi >>u Shamt
8660// else:
8661// Lo = Hi >>u (Shamt-XLEN);
8662// Hi = 0;
8663
8664unsigned ShiftRightOp = IsSRA ?ISD::SRA :ISD::SRL;
8665
8666SDValueZero = DAG.getConstant(0,DL, VT);
8667SDValue One = DAG.getConstant(1,DL, VT);
8668SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(),DL, VT);
8669SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1,DL, VT);
8670SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD,DL, VT, Shamt, MinusXLen);
8671SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB,DL, VT, XLenMinus1, Shamt);
8672
8673SDValue ShiftRightLo = DAG.getNode(ISD::SRL,DL, VT,Lo, Shamt);
8674SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL,DL, VT,Hi, One);
8675SDValue ShiftLeftHi =
8676 DAG.getNode(ISD::SHL,DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8677SDValue LoTrue = DAG.getNode(ISD::OR,DL, VT, ShiftRightLo, ShiftLeftHi);
8678SDValue HiTrue = DAG.getNode(ShiftRightOp,DL, VT,Hi, Shamt);
8679SDValue LoFalse = DAG.getNode(ShiftRightOp,DL, VT,Hi, ShamtMinusXLen);
8680SDValue HiFalse =
8681 IsSRA ? DAG.getNode(ISD::SRA,DL, VT,Hi, XLenMinus1) :Zero;
8682
8683SDValueCC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero,ISD::SETLT);
8684
8685Lo = DAG.getNode(ISD::SELECT,DL, VT,CC, LoTrue, LoFalse);
8686Hi = DAG.getNode(ISD::SELECT,DL, VT,CC, HiTrue, HiFalse);
8687
8688SDValue Parts[2] = {Lo,Hi};
8689return DAG.getMergeValues(Parts,DL);
8690}
8691
8692// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8693// legal equivalently-sized i8 type, so we can use that as a go-between.
8694SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValueOp,
8695SelectionDAG &DAG) const{
8696SDLocDL(Op);
8697MVT VT =Op.getSimpleValueType();
8698SDValue SplatVal =Op.getOperand(0);
8699// All-zeros or all-ones splats are handled specially.
8700if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8701SDValue VL =getDefaultScalableVLOps(VT,DL, DAG, Subtarget).second;
8702return DAG.getNode(RISCVISD::VMSET_VL,DL, VT, VL);
8703 }
8704if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8705SDValue VL =getDefaultScalableVLOps(VT,DL, DAG, Subtarget).second;
8706return DAG.getNode(RISCVISD::VMCLR_VL,DL, VT, VL);
8707 }
8708MVT InterVT = VT.changeVectorElementType(MVT::i8);
8709 SplatVal = DAG.getNode(ISD::AND,DL, SplatVal.getValueType(), SplatVal,
8710 DAG.getConstant(1,DL, SplatVal.getValueType()));
8711SDValueLHS = DAG.getSplatVector(InterVT,DL, SplatVal);
8712SDValueZero = DAG.getConstant(0,DL, InterVT);
8713return DAG.getSetCC(DL, VT, LHS, Zero,ISD::SETNE);
8714}
8715
8716// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8717// illegal (currently only vXi64 RV32).
8718// FIXME: We could also catch non-constant sign-extended i32 values and lower
8719// them to VMV_V_X_VL.
8720SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValueOp,
8721SelectionDAG &DAG) const{
8722SDLocDL(Op);
8723MVT VecVT =Op.getSimpleValueType();
8724assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8725"Unexpected SPLAT_VECTOR_PARTS lowering");
8726
8727assert(Op.getNumOperands() == 2 &&"Unexpected number of operands!");
8728SDValueLo =Op.getOperand(0);
8729SDValueHi =Op.getOperand(1);
8730
8731MVT ContainerVT = VecVT;
8732if (VecVT.isFixedLengthVector())
8733 ContainerVT =getContainerForFixedLengthVector(VecVT);
8734
8735auto VL =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).second;
8736
8737SDValue Res =
8738splatPartsI64WithVL(DL, ContainerVT,SDValue(),Lo,Hi, VL, DAG);
8739
8740if (VecVT.isFixedLengthVector())
8741 Res =convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8742
8743return Res;
8744}
8745
8746// Custom-lower extensions from mask vectors by using a vselect either with 1
8747// for zero/any-extension or -1 for sign-extension:
8748// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8749// Note that any-extension is lowered identically to zero-extension.
8750SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValueOp,SelectionDAG &DAG,
8751 int64_t ExtTrueVal) const{
8752SDLocDL(Op);
8753MVT VecVT =Op.getSimpleValueType();
8754SDValue Src =Op.getOperand(0);
8755// Only custom-lower extensions from mask types
8756assert(Src.getValueType().isVector() &&
8757 Src.getValueType().getVectorElementType() == MVT::i1);
8758
8759if (VecVT.isScalableVector()) {
8760SDValue SplatZero = DAG.getConstant(0,DL, VecVT);
8761SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal,DL, VecVT);
8762return DAG.getNode(ISD::VSELECT,DL, VecVT, Src, SplatTrueVal, SplatZero);
8763 }
8764
8765MVT ContainerVT =getContainerForFixedLengthVector(VecVT);
8766MVT I1ContainerVT =
8767MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8768
8769SDValueCC =convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8770
8771SDValue VL =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).second;
8772
8773MVT XLenVT = Subtarget.getXLenVT();
8774SDValue SplatZero = DAG.getConstant(0,DL, XLenVT);
8775SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal,DL, XLenVT);
8776
8777 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
8778 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8779 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
8780 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8781SDValueSelect =
8782 DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT,CC, SplatTrueVal,
8783 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8784
8785returnconvertFromScalableVector(VecVT,Select, DAG, Subtarget);
8786}
8787
8788SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8789SDValueOp,SelectionDAG &DAG,unsigned ExtendOpc) const{
8790MVT ExtVT =Op.getSimpleValueType();
8791// Only custom-lower extensions from fixed-length vector types.
8792if (!ExtVT.isFixedLengthVector())
8793returnOp;
8794MVT VT =Op.getOperand(0).getSimpleValueType();
8795// Grab the canonical container type for the extended type. Infer the smaller
8796// type from that to ensure the same number of vector elements, as we know
8797// the LMUL will be sufficient to hold the smaller type.
8798MVT ContainerExtVT =getContainerForFixedLengthVector(ExtVT);
8799// Get the extended container type manually to ensure the same number of
8800// vector elements between source and dest.
8801MVT ContainerVT =MVT::getVectorVT(VT.getVectorElementType(),
8802 ContainerExtVT.getVectorElementCount());
8803
8804SDValue Op1 =
8805convertToScalableVector(ContainerVT,Op.getOperand(0), DAG, Subtarget);
8806
8807SDLocDL(Op);
8808auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
8809
8810SDValueExt = DAG.getNode(ExtendOpc,DL, ContainerExtVT, Op1, Mask, VL);
8811
8812returnconvertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8813}
8814
8815// Custom-lower truncations from vectors to mask vectors by using a mask and a
8816// setcc operation:
8817// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8818SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValueOp,
8819SelectionDAG &DAG) const{
8820bool IsVPTrunc =Op.getOpcode() == ISD::VP_TRUNCATE;
8821SDLocDL(Op);
8822EVT MaskVT =Op.getValueType();
8823// Only expect to custom-lower truncations to mask types
8824assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8825"Unexpected type for vector mask lowering");
8826SDValue Src =Op.getOperand(0);
8827MVT VecVT = Src.getSimpleValueType();
8828SDValueMask, VL;
8829if (IsVPTrunc) {
8830Mask =Op.getOperand(1);
8831 VL =Op.getOperand(2);
8832 }
8833// If this is a fixed vector, we need to convert it to a scalable vector.
8834MVT ContainerVT = VecVT;
8835
8836if (VecVT.isFixedLengthVector()) {
8837 ContainerVT =getContainerForFixedLengthVector(VecVT);
8838 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8839if (IsVPTrunc) {
8840MVT MaskContainerVT =
8841getContainerForFixedLengthVector(Mask.getSimpleValueType());
8842Mask =convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8843 }
8844 }
8845
8846if (!IsVPTrunc) {
8847 std::tie(Mask, VL) =
8848getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);
8849 }
8850
8851SDValue SplatOne = DAG.getConstant(1,DL, Subtarget.getXLenVT());
8852SDValue SplatZero = DAG.getConstant(0,DL, Subtarget.getXLenVT());
8853
8854 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
8855 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8856 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
8857 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8858
8859MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8860SDValue Trunc = DAG.getNode(RISCVISD::AND_VL,DL, ContainerVT, Src, SplatOne,
8861 DAG.getUNDEF(ContainerVT), Mask, VL);
8862 Trunc = DAG.getNode(RISCVISD::SETCC_VL,DL, MaskContainerVT,
8863 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8864 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8865if (MaskVT.isFixedLengthVector())
8866 Trunc =convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8867return Trunc;
8868}
8869
8870SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValueOp,
8871SelectionDAG &DAG) const{
8872unsigned Opc =Op.getOpcode();
8873bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8874SDLocDL(Op);
8875
8876MVT VT =Op.getSimpleValueType();
8877// Only custom-lower vector truncates
8878assert(VT.isVector() &&"Unexpected type for vector truncate lowering");
8879
8880// Truncates to mask types are handled differently
8881if (VT.getVectorElementType() == MVT::i1)
8882return lowerVectorMaskTruncLike(Op, DAG);
8883
8884// RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8885// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8886// truncate by one power of two at a time.
8887MVT DstEltVT = VT.getVectorElementType();
8888
8889SDValue Src =Op.getOperand(0);
8890MVT SrcVT = Src.getSimpleValueType();
8891MVT SrcEltVT = SrcVT.getVectorElementType();
8892
8893assert(DstEltVT.bitsLT(SrcEltVT) &&isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8894isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8895"Unexpected vector truncate lowering");
8896
8897MVT ContainerVT = SrcVT;
8898SDValueMask, VL;
8899if (IsVPTrunc) {
8900Mask =Op.getOperand(1);
8901 VL =Op.getOperand(2);
8902 }
8903if (SrcVT.isFixedLengthVector()) {
8904 ContainerVT =getContainerForFixedLengthVector(SrcVT);
8905 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8906if (IsVPTrunc) {
8907MVT MaskVT =getMaskTypeFor(ContainerVT);
8908Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8909 }
8910 }
8911
8912SDValueResult = Src;
8913if (!IsVPTrunc) {
8914 std::tie(Mask, VL) =
8915getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);
8916 }
8917
8918unsigned NewOpc;
8919if (Opc ==ISD::TRUNCATE_SSAT_S)
8920 NewOpc =RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
8921elseif (Opc ==ISD::TRUNCATE_USAT_U)
8922 NewOpc =RISCVISD::TRUNCATE_VECTOR_VL_USAT;
8923else
8924 NewOpc =RISCVISD::TRUNCATE_VECTOR_VL;
8925
8926do {
8927 SrcEltVT =MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8928MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8929Result = DAG.getNode(NewOpc,DL, ResultVT, Result, Mask, VL);
8930 }while (SrcEltVT != DstEltVT);
8931
8932if (SrcVT.isFixedLengthVector())
8933Result =convertFromScalableVector(VT, Result, DAG, Subtarget);
8934
8935returnResult;
8936}
8937
8938SDValue
8939RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValueOp,
8940SelectionDAG &DAG) const{
8941SDLocDL(Op);
8942SDValue Chain =Op.getOperand(0);
8943SDValue Src =Op.getOperand(1);
8944MVT VT =Op.getSimpleValueType();
8945MVT SrcVT = Src.getSimpleValueType();
8946MVT ContainerVT = VT;
8947if (VT.isFixedLengthVector()) {
8948MVT SrcContainerVT =getContainerForFixedLengthVector(SrcVT);
8949 ContainerVT =
8950 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8951 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8952 }
8953
8954auto [Mask, VL] =getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);
8955
8956// RVV can only widen/truncate fp to types double/half the size as the source.
8957if ((VT.getVectorElementType() == MVT::f64 &&
8958 (SrcVT.getVectorElementType() == MVT::f16 ||
8959 SrcVT.getVectorElementType() == MVT::bf16)) ||
8960 ((VT.getVectorElementType() == MVT::f16 ||
8961 VT.getVectorElementType() == MVT::bf16) &&
8962 SrcVT.getVectorElementType() == MVT::f64)) {
8963// For double rounding, the intermediate rounding should be round-to-odd.
8964unsigned InterConvOpc =Op.getOpcode() ==ISD::STRICT_FP_EXTEND
8965 ?RISCVISD::STRICT_FP_EXTEND_VL
8966 :RISCVISD::STRICT_VFNCVT_ROD_VL;
8967MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8968 Src = DAG.getNode(InterConvOpc,DL, DAG.getVTList(InterVT, MVT::Other),
8969 Chain, Src, Mask, VL);
8970 Chain = Src.getValue(1);
8971 }
8972
8973unsigned ConvOpc =Op.getOpcode() ==ISD::STRICT_FP_EXTEND
8974 ?RISCVISD::STRICT_FP_EXTEND_VL
8975 :RISCVISD::STRICT_FP_ROUND_VL;
8976SDValue Res = DAG.getNode(ConvOpc,DL, DAG.getVTList(ContainerVT, MVT::Other),
8977 Chain, Src, Mask, VL);
8978if (VT.isFixedLengthVector()) {
8979// StrictFP operations have two result values. Their lowered result should
8980// have same result count.
8981SDValue SubVec =convertFromScalableVector(VT, Res, DAG, Subtarget);
8982 Res = DAG.getMergeValues({SubVec, Res.getValue(1)},DL);
8983 }
8984return Res;
8985}
8986
8987SDValue
8988RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValueOp,
8989SelectionDAG &DAG) const{
8990bool IsVP =
8991Op.getOpcode() == ISD::VP_FP_ROUND ||Op.getOpcode() == ISD::VP_FP_EXTEND;
8992bool IsExtend =
8993Op.getOpcode() == ISD::VP_FP_EXTEND ||Op.getOpcode() ==ISD::FP_EXTEND;
8994// RVV can only do truncate fp to types half the size as the source. We
8995// custom-lower f64->f16 rounds via RVV's round-to-odd float
8996// conversion instruction.
8997SDLocDL(Op);
8998MVT VT =Op.getSimpleValueType();
8999
9000assert(VT.isVector() &&"Unexpected type for vector truncate lowering");
9001
9002SDValue Src =Op.getOperand(0);
9003MVT SrcVT = Src.getSimpleValueType();
9004
9005bool IsDirectExtend =
9006 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9007 (SrcVT.getVectorElementType() != MVT::f16 &&
9008 SrcVT.getVectorElementType() != MVT::bf16));
9009bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9010 VT.getVectorElementType() != MVT::bf16) ||
9011 SrcVT.getVectorElementType() != MVT::f64);
9012
9013bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9014
9015// Prepare any fixed-length vector operands.
9016MVT ContainerVT = VT;
9017SDValueMask, VL;
9018if (IsVP) {
9019Mask =Op.getOperand(1);
9020 VL =Op.getOperand(2);
9021 }
9022if (VT.isFixedLengthVector()) {
9023MVT SrcContainerVT =getContainerForFixedLengthVector(SrcVT);
9024 ContainerVT =
9025 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9026 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9027if (IsVP) {
9028MVT MaskVT =getMaskTypeFor(ContainerVT);
9029Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9030 }
9031 }
9032
9033if (!IsVP)
9034 std::tie(Mask, VL) =
9035getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);
9036
9037unsigned ConvOpc = IsExtend ?RISCVISD::FP_EXTEND_VL :RISCVISD::FP_ROUND_VL;
9038
9039if (IsDirectConv) {
9040 Src = DAG.getNode(ConvOpc,DL, ContainerVT, Src, Mask, VL);
9041if (VT.isFixedLengthVector())
9042 Src =convertFromScalableVector(VT, Src, DAG, Subtarget);
9043return Src;
9044 }
9045
9046unsigned InterConvOpc =
9047 IsExtend ?RISCVISD::FP_EXTEND_VL :RISCVISD::VFNCVT_ROD_VL;
9048
9049MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9050SDValue IntermediateConv =
9051 DAG.getNode(InterConvOpc,DL, InterVT, Src, Mask, VL);
9052SDValueResult =
9053 DAG.getNode(ConvOpc,DL, ContainerVT, IntermediateConv, Mask, VL);
9054if (VT.isFixedLengthVector())
9055returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
9056returnResult;
9057}
9058
9059// Given a scalable vector type and an index into it, returns the type for the
9060// smallest subvector that the index fits in. This can be used to reduce LMUL
9061// for operations like vslidedown.
9062//
9063// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9064static std::optional<MVT>
9065getSmallestVTForIndex(MVT VecVT,unsigned MaxIdx,SDLocDL,SelectionDAG &DAG,
9066constRISCVSubtarget &Subtarget) {
9067assert(VecVT.isScalableVector());
9068constunsigned EltSize = VecVT.getScalarSizeInBits();
9069constunsigned VectorBitsMin = Subtarget.getRealMinVLen();
9070constunsigned MinVLMAX = VectorBitsMin / EltSize;
9071MVT SmallerVT;
9072if (MaxIdx < MinVLMAX)
9073 SmallerVT =getLMUL1VT(VecVT);
9074elseif (MaxIdx < MinVLMAX * 2)
9075 SmallerVT =getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9076elseif (MaxIdx < MinVLMAX * 4)
9077 SmallerVT =getLMUL1VT(VecVT)
9078 .getDoubleNumVectorElementsVT()
9079 .getDoubleNumVectorElementsVT();
9080if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9081return std::nullopt;
9082return SmallerVT;
9083}
9084
9085// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9086// first position of a vector, and that vector is slid up to the insert index.
9087// By limiting the active vector length to index+1 and merging with the
9088// original vector (with an undisturbed tail policy for elements >= VL), we
9089// achieve the desired result of leaving all elements untouched except the one
9090// at VL-1, which is replaced with the desired value.
9091SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValueOp,
9092SelectionDAG &DAG) const{
9093SDLocDL(Op);
9094MVT VecVT =Op.getSimpleValueType();
9095MVT XLenVT = Subtarget.getXLenVT();
9096SDValue Vec =Op.getOperand(0);
9097SDValue Val =Op.getOperand(1);
9098MVT ValVT = Val.getSimpleValueType();
9099SDValueIdx =Op.getOperand(2);
9100
9101if (VecVT.getVectorElementType() == MVT::i1) {
9102// FIXME: For now we just promote to an i8 vector and insert into that,
9103// but this is probably not optimal.
9104MVT WideVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9105 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT, Vec);
9106 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, WideVT, Vec, Val,Idx);
9107return DAG.getNode(ISD::TRUNCATE,DL, VecVT, Vec);
9108 }
9109
9110if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9111 ValVT == MVT::bf16) {
9112// If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9113MVT IntVT = VecVT.changeTypeToInteger();
9114SDValue IntInsert = DAG.getNode(
9115ISD::INSERT_VECTOR_ELT,DL, IntVT, DAG.getBitcast(IntVT, Vec),
9116 DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Val),Idx);
9117return DAG.getBitcast(VecVT, IntInsert);
9118 }
9119
9120MVT ContainerVT = VecVT;
9121// If the operand is a fixed-length vector, convert to a scalable one.
9122if (VecVT.isFixedLengthVector()) {
9123 ContainerVT =getContainerForFixedLengthVector(VecVT);
9124 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9125 }
9126
9127// If we know the index we're going to insert at, we can shrink Vec so that
9128// we're performing the scalar inserts and slideup on a smaller LMUL.
9129MVT OrigContainerVT = ContainerVT;
9130SDValue OrigVec = Vec;
9131SDValue AlignedIdx;
9132if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9133constunsigned OrigIdx = IdxC->getZExtValue();
9134// Do we know an upper bound on LMUL?
9135if (auto ShrunkVT =getSmallestVTForIndex(ContainerVT, OrigIdx,
9136DL, DAG, Subtarget)) {
9137 ContainerVT = *ShrunkVT;
9138 AlignedIdx = DAG.getVectorIdxConstant(0,DL);
9139 }
9140
9141// If we're compiling for an exact VLEN value, we can always perform
9142// the insert in m1 as we can determine the register corresponding to
9143// the index in the register group.
9144constMVT M1VT =getLMUL1VT(ContainerVT);
9145if (auto VLEN = Subtarget.getRealVLen();
9146 VLEN && ContainerVT.bitsGT(M1VT)) {
9147EVT ElemVT = VecVT.getVectorElementType();
9148unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9149unsigned RemIdx = OrigIdx % ElemsPerVReg;
9150unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9151unsigned ExtractIdx =
9152 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9153 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx,DL);
9154Idx = DAG.getVectorIdxConstant(RemIdx,DL);
9155 ContainerVT = M1VT;
9156 }
9157
9158if (AlignedIdx)
9159 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Vec,
9160 AlignedIdx);
9161 }
9162
9163bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9164// Even i64-element vectors on RV32 can be lowered without scalar
9165// legalization if the most-significant 32 bits of the value are not affected
9166// by the sign-extension of the lower 32 bits.
9167// TODO: We could also catch sign extensions of a 32-bit value.
9168if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9169constauto *CVal = cast<ConstantSDNode>(Val);
9170if (isInt<32>(CVal->getSExtValue())) {
9171 IsLegalInsert =true;
9172 Val = DAG.getSignedConstant(CVal->getSExtValue(),DL, MVT::i32);
9173 }
9174 }
9175
9176auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);
9177
9178SDValue ValInVec;
9179
9180if (IsLegalInsert) {
9181unsigned Opc =
9182 VecVT.isFloatingPoint() ?RISCVISD::VFMV_S_F_VL :RISCVISD::VMV_S_X_VL;
9183if (isNullConstant(Idx)) {
9184if (!VecVT.isFloatingPoint())
9185 Val = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, Val);
9186 Vec = DAG.getNode(Opc,DL, ContainerVT, Vec, Val, VL);
9187
9188if (AlignedIdx)
9189 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, OrigContainerVT, OrigVec,
9190 Vec, AlignedIdx);
9191if (!VecVT.isFixedLengthVector())
9192return Vec;
9193returnconvertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9194 }
9195 ValInVec =lowerScalarInsert(Val, VL, ContainerVT,DL, DAG, Subtarget);
9196 }else {
9197// On RV32, i64-element vectors must be specially handled to place the
9198// value at element 0, by using two vslide1down instructions in sequence on
9199// the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9200// this.
9201SDValue ValLo, ValHi;
9202 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val,DL, MVT::i32, MVT::i32);
9203MVT I32ContainerVT =
9204MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9205SDValue I32Mask =
9206getDefaultScalableVLOps(I32ContainerVT,DL, DAG, Subtarget).first;
9207// Limit the active VL to two.
9208SDValue InsertI64VL = DAG.getConstant(2,DL, XLenVT);
9209// If the Idx is 0 we can insert directly into the vector.
9210if (isNullConstant(Idx)) {
9211// First slide in the lo value, then the hi in above it. We use slide1down
9212// to avoid the register group overlap constraint of vslide1up.
9213 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,
9214 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9215// If the source vector is undef don't pass along the tail elements from
9216// the previous slide1down.
9217SDValueTail = Vec.isUndef() ? Vec : ValInVec;
9218 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,
9219Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9220// Bitcast back to the right container type.
9221 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9222
9223if (AlignedIdx)
9224 ValInVec =
9225 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, OrigContainerVT, OrigVec,
9226 ValInVec, AlignedIdx);
9227if (!VecVT.isFixedLengthVector())
9228return ValInVec;
9229returnconvertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9230 }
9231
9232// First slide in the lo value, then the hi in above it. We use slide1down
9233// to avoid the register group overlap constraint of vslide1up.
9234 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,
9235 DAG.getUNDEF(I32ContainerVT),
9236 DAG.getUNDEF(I32ContainerVT), ValLo,
9237 I32Mask, InsertI64VL);
9238 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,
9239 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9240 I32Mask, InsertI64VL);
9241// Bitcast back to the right container type.
9242 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9243 }
9244
9245// Now that the value is in a vector, slide it into position.
9246SDValue InsertVL =
9247 DAG.getNode(ISD::ADD,DL, XLenVT,Idx, DAG.getConstant(1,DL, XLenVT));
9248
9249// Use tail agnostic policy if Idx is the last index of Vec.
9250unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9251if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9252Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9253 Policy =RISCVII::TAIL_AGNOSTIC;
9254SDValue Slideup =getVSlideup(DAG, Subtarget,DL, ContainerVT, Vec, ValInVec,
9255Idx, Mask, InsertVL, Policy);
9256
9257if (AlignedIdx)
9258 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, OrigContainerVT, OrigVec,
9259 Slideup, AlignedIdx);
9260if (!VecVT.isFixedLengthVector())
9261return Slideup;
9262returnconvertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9263}
9264
9265// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9266// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9267// types this is done using VMV_X_S to allow us to glean information about the
9268// sign bits of the result.
9269SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValueOp,
9270SelectionDAG &DAG) const{
9271SDLocDL(Op);
9272SDValueIdx =Op.getOperand(1);
9273SDValue Vec =Op.getOperand(0);
9274EVT EltVT =Op.getValueType();
9275MVT VecVT = Vec.getSimpleValueType();
9276MVT XLenVT = Subtarget.getXLenVT();
9277
9278if (VecVT.getVectorElementType() == MVT::i1) {
9279// Use vfirst.m to extract the first bit.
9280if (isNullConstant(Idx)) {
9281MVT ContainerVT = VecVT;
9282if (VecVT.isFixedLengthVector()) {
9283 ContainerVT =getContainerForFixedLengthVector(VecVT);
9284 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9285 }
9286auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);
9287SDValue Vfirst =
9288 DAG.getNode(RISCVISD::VFIRST_VL,DL, XLenVT, Vec, Mask, VL);
9289SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9290 DAG.getConstant(0,DL, XLenVT),ISD::SETEQ);
9291return DAG.getNode(ISD::TRUNCATE,DL, EltVT, Res);
9292 }
9293if (VecVT.isFixedLengthVector()) {
9294unsigned NumElts = VecVT.getVectorNumElements();
9295if (NumElts >= 8) {
9296MVT WideEltVT;
9297unsigned WidenVecLen;
9298SDValue ExtractElementIdx;
9299SDValue ExtractBitIdx;
9300unsigned MaxEEW = Subtarget.getELen();
9301MVT LargestEltVT =MVT::getIntegerVT(
9302 std::min(MaxEEW,unsigned(XLenVT.getSizeInBits())));
9303if (NumElts <= LargestEltVT.getSizeInBits()) {
9304assert(isPowerOf2_32(NumElts) &&
9305"the number of elements should be power of 2");
9306 WideEltVT =MVT::getIntegerVT(NumElts);
9307 WidenVecLen = 1;
9308 ExtractElementIdx = DAG.getConstant(0,DL, XLenVT);
9309 ExtractBitIdx =Idx;
9310 }else {
9311 WideEltVT = LargestEltVT;
9312 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9313// extract element index = index / element width
9314 ExtractElementIdx = DAG.getNode(
9315ISD::SRL,DL, XLenVT,Idx,
9316 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()),DL, XLenVT));
9317// mask bit index = index % element width
9318 ExtractBitIdx = DAG.getNode(
9319ISD::AND,DL, XLenVT,Idx,
9320 DAG.getConstant(WideEltVT.getSizeInBits() - 1,DL, XLenVT));
9321 }
9322MVT WideVT =MVT::getVectorVT(WideEltVT, WidenVecLen);
9323 Vec = DAG.getNode(ISD::BITCAST,DL, WideVT, Vec);
9324SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, XLenVT,
9325 Vec, ExtractElementIdx);
9326// Extract the bit from GPR.
9327SDValue ShiftRight =
9328 DAG.getNode(ISD::SRL,DL, XLenVT, ExtractElt, ExtractBitIdx);
9329SDValue Res = DAG.getNode(ISD::AND,DL, XLenVT, ShiftRight,
9330 DAG.getConstant(1,DL, XLenVT));
9331return DAG.getNode(ISD::TRUNCATE,DL, EltVT, Res);
9332 }
9333 }
9334// Otherwise, promote to an i8 vector and extract from that.
9335MVT WideVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9336 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT, Vec);
9337return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, EltVT, Vec,Idx);
9338 }
9339
9340if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9341 EltVT == MVT::bf16) {
9342// If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9343MVT IntVT = VecVT.changeTypeToInteger();
9344SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9345SDValue IntExtract =
9346 DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, XLenVT, IntVec,Idx);
9347return DAG.getNode(RISCVISD::FMV_H_X,DL, EltVT, IntExtract);
9348 }
9349
9350// If this is a fixed vector, we need to convert it to a scalable vector.
9351MVT ContainerVT = VecVT;
9352if (VecVT.isFixedLengthVector()) {
9353 ContainerVT =getContainerForFixedLengthVector(VecVT);
9354 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9355 }
9356
9357// If we're compiling for an exact VLEN value and we have a known
9358// constant index, we can always perform the extract in m1 (or
9359// smaller) as we can determine the register corresponding to
9360// the index in the register group.
9361constauto VLen = Subtarget.getRealVLen();
9362if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9363 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9364MVT M1VT =getLMUL1VT(ContainerVT);
9365unsigned OrigIdx = IdxC->getZExtValue();
9366EVT ElemVT = VecVT.getVectorElementType();
9367unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9368unsigned RemIdx = OrigIdx % ElemsPerVReg;
9369unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9370unsigned ExtractIdx =
9371 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9372 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, Vec,
9373 DAG.getVectorIdxConstant(ExtractIdx,DL));
9374Idx = DAG.getVectorIdxConstant(RemIdx,DL);
9375 ContainerVT = M1VT;
9376 }
9377
9378// Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9379// contains our index.
9380 std::optional<uint64_t> MaxIdx;
9381if (VecVT.isFixedLengthVector())
9382 MaxIdx = VecVT.getVectorNumElements() - 1;
9383if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9384 MaxIdx = IdxC->getZExtValue();
9385if (MaxIdx) {
9386if (auto SmallerVT =
9387getSmallestVTForIndex(ContainerVT, *MaxIdx,DL, DAG, Subtarget)) {
9388 ContainerVT = *SmallerVT;
9389 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Vec,
9390 DAG.getConstant(0,DL, XLenVT));
9391 }
9392 }
9393
9394// If after narrowing, the required slide is still greater than LMUL2,
9395// fallback to generic expansion and go through the stack. This is done
9396// for a subtle reason: extracting *all* elements out of a vector is
9397// widely expected to be linear in vector size, but because vslidedown
9398// is linear in LMUL, performing N extracts using vslidedown becomes
9399// O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9400// seems to have the same problem (the store is linear in LMUL), but the
9401// generic expansion *memoizes* the store, and thus for many extracts of
9402// the same vector we end up with one store and a bunch of loads.
9403// TODO: We don't have the same code for insert_vector_elt because we
9404// have BUILD_VECTOR and handle the degenerate case there. Should we
9405// consider adding an inverse BUILD_VECTOR node?
9406MVT LMUL2VT =getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9407if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9408returnSDValue();
9409
9410// If the index is 0, the vector is already in the right position.
9411if (!isNullConstant(Idx)) {
9412// Use a VL of 1 to avoid processing more elements than we need.
9413auto [Mask, VL] =getDefaultVLOps(1, ContainerVT,DL, DAG, Subtarget);
9414 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT,
9415 DAG.getUNDEF(ContainerVT), Vec,Idx, Mask, VL);
9416 }
9417
9418if (!EltVT.isInteger()) {
9419// Floating-point extracts are handled in TableGen.
9420return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, EltVT, Vec,
9421 DAG.getVectorIdxConstant(0,DL));
9422 }
9423
9424SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, Vec);
9425return DAG.getNode(ISD::TRUNCATE,DL, EltVT, Elt0);
9426}
9427
9428// Some RVV intrinsics may claim that they want an integer operand to be
9429// promoted or expanded.
9430staticSDValuelowerVectorIntrinsicScalars(SDValueOp,SelectionDAG &DAG,
9431constRISCVSubtarget &Subtarget) {
9432assert((Op.getOpcode() ==ISD::INTRINSIC_VOID ||
9433Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
9434Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN) &&
9435"Unexpected opcode");
9436
9437if (!Subtarget.hasVInstructions())
9438returnSDValue();
9439
9440bool HasChain =Op.getOpcode() ==ISD::INTRINSIC_VOID ||
9441Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN;
9442unsigned IntNo =Op.getConstantOperandVal(HasChain ? 1 : 0);
9443
9444SDLocDL(Op);
9445
9446constRISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
9447 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9448if (!II || !II->hasScalarOperand())
9449returnSDValue();
9450
9451unsigned SplatOp =II->ScalarOperand + 1 + HasChain;
9452assert(SplatOp <Op.getNumOperands());
9453
9454SmallVector<SDValue, 8>Operands(Op->ops());
9455SDValue &ScalarOp =Operands[SplatOp];
9456MVT OpVT = ScalarOp.getSimpleValueType();
9457MVT XLenVT = Subtarget.getXLenVT();
9458
9459// If this isn't a scalar, or its type is XLenVT we're done.
9460if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9461returnSDValue();
9462
9463// Simplest case is that the operand needs to be promoted to XLenVT.
9464if (OpVT.bitsLT(XLenVT)) {
9465// If the operand is a constant, sign extend to increase our chances
9466// of being able to use a .vi instruction. ANY_EXTEND would become a
9467// a zero extend and the simm5 check in isel would fail.
9468// FIXME: Should we ignore the upper bits in isel instead?
9469unsigned ExtOpc =
9470 isa<ConstantSDNode>(ScalarOp) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;
9471 ScalarOp = DAG.getNode(ExtOpc,DL, XLenVT, ScalarOp);
9472return DAG.getNode(Op->getOpcode(),DL,Op->getVTList(),Operands);
9473 }
9474
9475// Use the previous operand to get the vXi64 VT. The result might be a mask
9476// VT for compares. Using the previous operand assumes that the previous
9477// operand will never have a smaller element size than a scalar operand and
9478// that a widening operation never uses SEW=64.
9479// NOTE: If this fails the below assert, we can probably just find the
9480// element count from any operand or result and use it to construct the VT.
9481assert(II->ScalarOperand > 0 &&"Unexpected splat operand!");
9482MVT VT =Op.getOperand(SplatOp - 1).getSimpleValueType();
9483
9484// The more complex case is when the scalar is larger than XLenVT.
9485assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9486 VT.getVectorElementType() == MVT::i64 &&"Unexpected VTs!");
9487
9488// If this is a sign-extended 32-bit value, we can truncate it and rely on the
9489// instruction to sign-extend since SEW>XLEN.
9490if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9491 ScalarOp = DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, ScalarOp);
9492return DAG.getNode(Op->getOpcode(),DL,Op->getVTList(),Operands);
9493 }
9494
9495switch (IntNo) {
9496case Intrinsic::riscv_vslide1up:
9497case Intrinsic::riscv_vslide1down:
9498case Intrinsic::riscv_vslide1up_mask:
9499case Intrinsic::riscv_vslide1down_mask: {
9500// We need to special case these when the scalar is larger than XLen.
9501unsigned NumOps =Op.getNumOperands();
9502bool IsMasked = NumOps == 7;
9503
9504// Convert the vector source to the equivalent nxvXi32 vector.
9505MVT I32VT =MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9506SDValue Vec = DAG.getBitcast(I32VT,Operands[2]);
9507SDValue ScalarLo, ScalarHi;
9508 std::tie(ScalarLo, ScalarHi) =
9509 DAG.SplitScalar(ScalarOp,DL, MVT::i32, MVT::i32);
9510
9511// Double the VL since we halved SEW.
9512SDValue AVL =getVLOperand(Op);
9513SDValue I32VL;
9514
9515// Optimize for constant AVL
9516if (isa<ConstantSDNode>(AVL)) {
9517constauto [MinVLMAX, MaxVLMAX] =
9518RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);
9519
9520uint64_t AVLInt = AVL->getAsZExtVal();
9521if (AVLInt <= MinVLMAX) {
9522 I32VL = DAG.getConstant(2 * AVLInt,DL, XLenVT);
9523 }elseif (AVLInt >= 2 * MaxVLMAX) {
9524// Just set vl to VLMAX in this situation
9525 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9526 }else {
9527// For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9528// is related to the hardware implementation.
9529// So let the following code handle
9530 }
9531 }
9532if (!I32VL) {
9533RISCVII::VLMUL Lmul =RISCVTargetLowering::getLMUL(VT);
9534SDValue LMUL = DAG.getConstant(Lmul,DL, XLenVT);
9535unsigned Sew =RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9536SDValue SEW = DAG.getConstant(Sew,DL, XLenVT);
9537SDValue SETVL =
9538 DAG.getTargetConstant(Intrinsic::riscv_vsetvli,DL, MVT::i32);
9539// Using vsetvli instruction to get actually used length which related to
9540// the hardware implementation
9541SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, XLenVT, SETVL, AVL,
9542 SEW, LMUL);
9543 I32VL =
9544 DAG.getNode(ISD::SHL,DL, XLenVT, VL, DAG.getConstant(1,DL, XLenVT));
9545 }
9546
9547SDValue I32Mask =getAllOnesMask(I32VT, I32VL,DL, DAG);
9548
9549// Shift the two scalar parts in using SEW=32 slide1up/slide1down
9550// instructions.
9551SDValue Passthru;
9552if (IsMasked)
9553 Passthru = DAG.getUNDEF(I32VT);
9554else
9555 Passthru = DAG.getBitcast(I32VT,Operands[1]);
9556
9557if (IntNo == Intrinsic::riscv_vslide1up ||
9558 IntNo == Intrinsic::riscv_vslide1up_mask) {
9559 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL,DL, I32VT, Passthru, Vec,
9560 ScalarHi, I32Mask, I32VL);
9561 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL,DL, I32VT, Passthru, Vec,
9562 ScalarLo, I32Mask, I32VL);
9563 }else {
9564 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32VT, Passthru, Vec,
9565 ScalarLo, I32Mask, I32VL);
9566 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32VT, Passthru, Vec,
9567 ScalarHi, I32Mask, I32VL);
9568 }
9569
9570// Convert back to nxvXi64.
9571 Vec = DAG.getBitcast(VT, Vec);
9572
9573if (!IsMasked)
9574return Vec;
9575// Apply mask after the operation.
9576SDValue Mask =Operands[NumOps - 3];
9577SDValue MaskedOff =Operands[1];
9578// Assume Policy operand is the last operand.
9579uint64_t Policy =Operands[NumOps - 1]->getAsZExtVal();
9580// We don't need to select maskedoff if it's undef.
9581if (MaskedOff.isUndef())
9582return Vec;
9583// TAMU
9584if (Policy ==RISCVII::TAIL_AGNOSTIC)
9585return DAG.getNode(RISCVISD::VMERGE_VL,DL, VT, Mask, Vec, MaskedOff,
9586 DAG.getUNDEF(VT), AVL);
9587// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9588// It's fine because vmerge does not care mask policy.
9589return DAG.getNode(RISCVISD::VMERGE_VL,DL, VT, Mask, Vec, MaskedOff,
9590 MaskedOff, AVL);
9591 }
9592 }
9593
9594// We need to convert the scalar to a splat vector.
9595SDValue VL =getVLOperand(Op);
9596assert(VL.getValueType() == XLenVT);
9597 ScalarOp =splatSplitI64WithVL(DL, VT,SDValue(), ScalarOp, VL, DAG);
9598return DAG.getNode(Op->getOpcode(),DL,Op->getVTList(),Operands);
9599}
9600
9601// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9602// scalable vector llvm.get.vector.length for now.
9603//
9604// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9605// (vscale * VF). The vscale and VF are independent of element width. We use
9606// SEW=8 for the vsetvli because it is the only element width that supports all
9607// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9608// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9609// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9610// SEW and LMUL are better for the surrounding vector instructions.
9611staticSDValuelowerGetVectorLength(SDNode *N,SelectionDAG &DAG,
9612constRISCVSubtarget &Subtarget) {
9613MVT XLenVT = Subtarget.getXLenVT();
9614
9615// The smallest LMUL is only valid for the smallest element width.
9616constunsigned ElementWidth = 8;
9617
9618// Determine the VF that corresponds to LMUL 1 for ElementWidth.
9619unsigned LMul1VF =RISCV::RVVBitsPerBlock / ElementWidth;
9620// We don't support VF==1 with ELEN==32.
9621 [[maybe_unused]]unsigned MinVF =
9622RISCV::RVVBitsPerBlock / Subtarget.getELen();
9623
9624 [[maybe_unused]]unsigned VF =N->getConstantOperandVal(2);
9625assert(VF >= MinVF && VF <= (LMul1VF * 8) &&isPowerOf2_32(VF) &&
9626"Unexpected VF");
9627
9628bool Fractional = VF < LMul1VF;
9629unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9630unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9631unsigned VSEW =RISCVVType::encodeSEW(ElementWidth);
9632
9633SDLocDL(N);
9634
9635SDValue LMul = DAG.getTargetConstant(VLMUL,DL, XLenVT);
9636SDValue Sew = DAG.getTargetConstant(VSEW,DL, XLenVT);
9637
9638SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND,DL, XLenVT,N->getOperand(1));
9639
9640SDValueID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli,DL, XLenVT);
9641SDValue Res =
9642 DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, XLenVT,ID, AVL, Sew, LMul);
9643return DAG.getNode(ISD::TRUNCATE,DL,N->getValueType(0), Res);
9644}
9645
9646staticSDValuelowerCttzElts(SDNode *N,SelectionDAG &DAG,
9647constRISCVSubtarget &Subtarget) {
9648SDValue Op0 =N->getOperand(1);
9649MVT OpVT = Op0.getSimpleValueType();
9650MVT ContainerVT = OpVT;
9651if (OpVT.isFixedLengthVector()) {
9652 ContainerVT =getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9653 Op0 =convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9654 }
9655MVT XLenVT = Subtarget.getXLenVT();
9656SDLocDL(N);
9657auto [Mask, VL] =getDefaultVLOps(OpVT, ContainerVT,DL, DAG, Subtarget);
9658SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL,DL, XLenVT, Op0, Mask, VL);
9659if (isOneConstant(N->getOperand(2)))
9660return Res;
9661
9662// Convert -1 to VL.
9663SDValue Setcc =
9664 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0,DL, XLenVT),ISD::SETLT);
9665 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9666return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9667}
9668
9669staticinlinevoidpromoteVCIXScalar(constSDValue &Op,
9670SmallVectorImpl<SDValue> &Operands,
9671SelectionDAG &DAG) {
9672constRISCVSubtarget &Subtarget =
9673 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9674
9675bool HasChain =Op.getOpcode() ==ISD::INTRINSIC_VOID ||
9676Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN;
9677unsigned IntNo =Op.getConstantOperandVal(HasChain ? 1 : 0);
9678SDLocDL(Op);
9679
9680constRISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
9681 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9682if (!II || !II->hasScalarOperand())
9683return;
9684
9685unsigned SplatOp =II->ScalarOperand + 1;
9686assert(SplatOp <Op.getNumOperands());
9687
9688SDValue &ScalarOp =Operands[SplatOp];
9689MVT OpVT = ScalarOp.getSimpleValueType();
9690MVT XLenVT = Subtarget.getXLenVT();
9691
9692// The code below is partially copied from lowerVectorIntrinsicScalars.
9693// If this isn't a scalar, or its type is XLenVT we're done.
9694if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9695return;
9696
9697// Manually emit promote operation for scalar operation.
9698if (OpVT.bitsLT(XLenVT)) {
9699unsigned ExtOpc =
9700 isa<ConstantSDNode>(ScalarOp) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;
9701 ScalarOp = DAG.getNode(ExtOpc,DL, XLenVT, ScalarOp);
9702 }
9703}
9704
9705staticvoidprocessVCIXOperands(SDValue &OrigOp,
9706SmallVectorImpl<SDValue> &Operands,
9707SelectionDAG &DAG) {
9708promoteVCIXScalar(OrigOp,Operands, DAG);
9709constRISCVSubtarget &Subtarget =
9710 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9711for (SDValue &V :Operands) {
9712EVT ValType = V.getValueType();
9713if (ValType.isVector() && ValType.isFloatingPoint()) {
9714MVT InterimIVT =
9715MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9716 ValType.getVectorElementCount());
9717 V = DAG.getBitcast(InterimIVT, V);
9718 }
9719if (ValType.isFixedLengthVector()) {
9720MVT OpContainerVT =getContainerForFixedLengthVector(
9721 DAG, V.getSimpleValueType(), Subtarget);
9722 V =convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9723 }
9724 }
9725}
9726
9727// LMUL * VLEN should be greater than or equal to EGS * SEW
9728staticinlineboolisValidEGW(int EGS,EVT VT,
9729constRISCVSubtarget &Subtarget) {
9730return (Subtarget.getRealMinVLen() *
9731 VT.getSizeInBits().getKnownMinValue()) /RISCV::RVVBitsPerBlock >=
9732 EGS * VT.getScalarSizeInBits();
9733}
9734
9735SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValueOp,
9736SelectionDAG &DAG) const{
9737unsigned IntNo =Op.getConstantOperandVal(0);
9738SDLocDL(Op);
9739MVT XLenVT = Subtarget.getXLenVT();
9740
9741switch (IntNo) {
9742default:
9743break;// Don't custom lower most intrinsics.
9744case Intrinsic::riscv_tuple_insert: {
9745SDValue Vec =Op.getOperand(1);
9746SDValue SubVec =Op.getOperand(2);
9747SDValueIndex =Op.getOperand(3);
9748
9749return DAG.getNode(RISCVISD::TUPLE_INSERT,DL,Op.getValueType(), Vec,
9750 SubVec, Index);
9751 }
9752case Intrinsic::riscv_tuple_extract: {
9753SDValue Vec =Op.getOperand(1);
9754SDValueIndex =Op.getOperand(2);
9755
9756return DAG.getNode(RISCVISD::TUPLE_EXTRACT,DL,Op.getValueType(), Vec,
9757 Index);
9758 }
9759case Intrinsic::thread_pointer: {
9760EVT PtrVT =getPointerTy(DAG.getDataLayout());
9761return DAG.getRegister(RISCV::X4, PtrVT);
9762 }
9763case Intrinsic::riscv_orc_b:
9764case Intrinsic::riscv_brev8:
9765case Intrinsic::riscv_sha256sig0:
9766case Intrinsic::riscv_sha256sig1:
9767case Intrinsic::riscv_sha256sum0:
9768case Intrinsic::riscv_sha256sum1:
9769case Intrinsic::riscv_sm3p0:
9770case Intrinsic::riscv_sm3p1: {
9771unsigned Opc;
9772switch (IntNo) {
9773case Intrinsic::riscv_orc_b: Opc =RISCVISD::ORC_B;break;
9774case Intrinsic::riscv_brev8: Opc =RISCVISD::BREV8;break;
9775case Intrinsic::riscv_sha256sig0: Opc =RISCVISD::SHA256SIG0;break;
9776case Intrinsic::riscv_sha256sig1: Opc =RISCVISD::SHA256SIG1;break;
9777case Intrinsic::riscv_sha256sum0: Opc =RISCVISD::SHA256SUM0;break;
9778case Intrinsic::riscv_sha256sum1: Opc =RISCVISD::SHA256SUM1;break;
9779case Intrinsic::riscv_sm3p0: Opc =RISCVISD::SM3P0;break;
9780case Intrinsic::riscv_sm3p1: Opc =RISCVISD::SM3P1;break;
9781 }
9782
9783return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1));
9784 }
9785case Intrinsic::riscv_sm4ks:
9786case Intrinsic::riscv_sm4ed: {
9787unsigned Opc =
9788 IntNo == Intrinsic::riscv_sm4ks ?RISCVISD::SM4KS :RISCVISD::SM4ED;
9789
9790return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1),Op.getOperand(2),
9791Op.getOperand(3));
9792 }
9793case Intrinsic::riscv_zip:
9794case Intrinsic::riscv_unzip: {
9795unsigned Opc =
9796 IntNo == Intrinsic::riscv_zip ?RISCVISD::ZIP :RISCVISD::UNZIP;
9797return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1));
9798 }
9799case Intrinsic::riscv_mopr:
9800return DAG.getNode(RISCVISD::MOPR,DL, XLenVT,Op.getOperand(1),
9801Op.getOperand(2));
9802
9803case Intrinsic::riscv_moprr: {
9804return DAG.getNode(RISCVISD::MOPRR,DL, XLenVT,Op.getOperand(1),
9805Op.getOperand(2),Op.getOperand(3));
9806 }
9807case Intrinsic::riscv_clmul:
9808return DAG.getNode(RISCVISD::CLMUL,DL, XLenVT,Op.getOperand(1),
9809Op.getOperand(2));
9810case Intrinsic::riscv_clmulh:
9811case Intrinsic::riscv_clmulr: {
9812unsigned Opc =
9813 IntNo == Intrinsic::riscv_clmulh ?RISCVISD::CLMULH :RISCVISD::CLMULR;
9814return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1),Op.getOperand(2));
9815 }
9816case Intrinsic::experimental_get_vector_length:
9817returnlowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9818case Intrinsic::experimental_cttz_elts:
9819returnlowerCttzElts(Op.getNode(), DAG, Subtarget);
9820case Intrinsic::riscv_vmv_x_s: {
9821SDValue Res = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT,Op.getOperand(1));
9822return DAG.getNode(ISD::TRUNCATE,DL,Op.getValueType(), Res);
9823 }
9824case Intrinsic::riscv_vfmv_f_s:
9825return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL,Op.getValueType(),
9826Op.getOperand(1), DAG.getVectorIdxConstant(0,DL));
9827case Intrinsic::riscv_vmv_v_x:
9828returnlowerScalarSplat(Op.getOperand(1),Op.getOperand(2),
9829Op.getOperand(3),Op.getSimpleValueType(),DL, DAG,
9830 Subtarget);
9831case Intrinsic::riscv_vfmv_v_f:
9832return DAG.getNode(RISCVISD::VFMV_V_F_VL,DL,Op.getValueType(),
9833Op.getOperand(1),Op.getOperand(2),Op.getOperand(3));
9834case Intrinsic::riscv_vmv_s_x: {
9835SDValueScalar =Op.getOperand(2);
9836
9837if (Scalar.getValueType().bitsLE(XLenVT)) {
9838Scalar = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, Scalar);
9839return DAG.getNode(RISCVISD::VMV_S_X_VL,DL,Op.getValueType(),
9840Op.getOperand(1), Scalar,Op.getOperand(3));
9841 }
9842
9843assert(Scalar.getValueType() == MVT::i64 &&"Unexpected scalar VT!");
9844
9845// This is an i64 value that lives in two scalar registers. We have to
9846// insert this in a convoluted way. First we build vXi64 splat containing
9847// the two values that we assemble using some bit math. Next we'll use
9848// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9849// to merge element 0 from our splat into the source vector.
9850// FIXME: This is probably not the best way to do this, but it is
9851// consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9852// point.
9853// sw lo, (a0)
9854// sw hi, 4(a0)
9855// vlse vX, (a0)
9856//
9857// vid.v vVid
9858// vmseq.vx mMask, vVid, 0
9859// vmerge.vvm vDest, vSrc, vVal, mMask
9860MVT VT =Op.getSimpleValueType();
9861SDValue Vec =Op.getOperand(1);
9862SDValue VL =getVLOperand(Op);
9863
9864SDValue SplattedVal =splatSplitI64WithVL(DL, VT,SDValue(), Scalar, VL, DAG);
9865if (Op.getOperand(1).isUndef())
9866return SplattedVal;
9867SDValue SplattedIdx =
9868 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),
9869 DAG.getConstant(0,DL, MVT::i32), VL);
9870
9871MVT MaskVT =getMaskTypeFor(VT);
9872SDValueMask =getAllOnesMask(VT, VL,DL, DAG);
9873SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, VT, Mask, VL);
9874SDValue SelectCond =
9875 DAG.getNode(RISCVISD::SETCC_VL,DL, MaskVT,
9876 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9877 DAG.getUNDEF(MaskVT), Mask, VL});
9878return DAG.getNode(RISCVISD::VMERGE_VL,DL, VT, SelectCond, SplattedVal,
9879 Vec, DAG.getUNDEF(VT), VL);
9880 }
9881case Intrinsic::riscv_vfmv_s_f:
9882return DAG.getNode(RISCVISD::VFMV_S_F_VL,DL,Op.getSimpleValueType(),
9883Op.getOperand(1),Op.getOperand(2),Op.getOperand(3));
9884// EGS * EEW >= 128 bits
9885case Intrinsic::riscv_vaesdf_vv:
9886case Intrinsic::riscv_vaesdf_vs:
9887case Intrinsic::riscv_vaesdm_vv:
9888case Intrinsic::riscv_vaesdm_vs:
9889case Intrinsic::riscv_vaesef_vv:
9890case Intrinsic::riscv_vaesef_vs:
9891case Intrinsic::riscv_vaesem_vv:
9892case Intrinsic::riscv_vaesem_vs:
9893case Intrinsic::riscv_vaeskf1:
9894case Intrinsic::riscv_vaeskf2:
9895case Intrinsic::riscv_vaesz_vs:
9896case Intrinsic::riscv_vsm4k:
9897case Intrinsic::riscv_vsm4r_vv:
9898case Intrinsic::riscv_vsm4r_vs: {
9899if (!isValidEGW(4,Op.getSimpleValueType(), Subtarget) ||
9900 !isValidEGW(4,Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9901 !isValidEGW(4,Op->getOperand(2).getSimpleValueType(), Subtarget))
9902report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9903returnOp;
9904 }
9905// EGS * EEW >= 256 bits
9906case Intrinsic::riscv_vsm3c:
9907case Intrinsic::riscv_vsm3me: {
9908if (!isValidEGW(8,Op.getSimpleValueType(), Subtarget) ||
9909 !isValidEGW(8,Op->getOperand(1).getSimpleValueType(), Subtarget))
9910report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9911returnOp;
9912 }
9913// zvknha(SEW=32)/zvknhb(SEW=[32|64])
9914case Intrinsic::riscv_vsha2ch:
9915case Intrinsic::riscv_vsha2cl:
9916case Intrinsic::riscv_vsha2ms: {
9917if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9918 !Subtarget.hasStdExtZvknhb())
9919report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9920if (!isValidEGW(4,Op.getSimpleValueType(), Subtarget) ||
9921 !isValidEGW(4,Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9922 !isValidEGW(4,Op->getOperand(2).getSimpleValueType(), Subtarget))
9923report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9924returnOp;
9925 }
9926case Intrinsic::riscv_sf_vc_v_x:
9927case Intrinsic::riscv_sf_vc_v_i:
9928case Intrinsic::riscv_sf_vc_v_xv:
9929case Intrinsic::riscv_sf_vc_v_iv:
9930case Intrinsic::riscv_sf_vc_v_vv:
9931case Intrinsic::riscv_sf_vc_v_fv:
9932case Intrinsic::riscv_sf_vc_v_xvv:
9933case Intrinsic::riscv_sf_vc_v_ivv:
9934case Intrinsic::riscv_sf_vc_v_vvv:
9935case Intrinsic::riscv_sf_vc_v_fvv:
9936case Intrinsic::riscv_sf_vc_v_xvw:
9937case Intrinsic::riscv_sf_vc_v_ivw:
9938case Intrinsic::riscv_sf_vc_v_vvw:
9939case Intrinsic::riscv_sf_vc_v_fvw: {
9940MVT VT =Op.getSimpleValueType();
9941
9942SmallVector<SDValue>Operands{Op->op_values()};
9943processVCIXOperands(Op,Operands, DAG);
9944
9945MVT RetVT = VT;
9946if (VT.isFixedLengthVector())
9947 RetVT =getContainerForFixedLengthVector(VT);
9948elseif (VT.isFloatingPoint())
9949 RetVT =MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9950 VT.getVectorElementCount());
9951
9952SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, RetVT,Operands);
9953
9954if (VT.isFixedLengthVector())
9955 NewNode =convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9956elseif (VT.isFloatingPoint())
9957 NewNode = DAG.getBitcast(VT, NewNode);
9958
9959if (Op == NewNode)
9960break;
9961
9962return NewNode;
9963 }
9964 }
9965
9966returnlowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9967}
9968
9969staticinlineSDValuegetVCIXISDNodeWCHAIN(SDValue &Op,SelectionDAG &DAG,
9970unsignedType) {
9971SDLocDL(Op);
9972SmallVector<SDValue>Operands{Op->op_values()};
9973Operands.erase(Operands.begin() + 1);
9974
9975constRISCVSubtarget &Subtarget =
9976 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9977MVT VT =Op.getSimpleValueType();
9978MVT RetVT = VT;
9979MVT FloatVT = VT;
9980
9981if (VT.isFloatingPoint()) {
9982 RetVT =MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9983 VT.getVectorElementCount());
9984 FloatVT = RetVT;
9985 }
9986if (VT.isFixedLengthVector())
9987 RetVT =getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT,
9988 Subtarget);
9989
9990processVCIXOperands(Op,Operands, DAG);
9991
9992SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9993SDValue NewNode = DAG.getNode(Type,DL, VTs,Operands);
9994SDValue Chain = NewNode.getValue(1);
9995
9996if (VT.isFixedLengthVector())
9997 NewNode =convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9998if (VT.isFloatingPoint())
9999 NewNode = DAG.getBitcast(VT, NewNode);
10000
10001 NewNode = DAG.getMergeValues({NewNode, Chain},DL);
10002
10003return NewNode;
10004}
10005
10006staticinlineSDValuegetVCIXISDNodeVOID(SDValue &Op,SelectionDAG &DAG,
10007unsignedType) {
10008SmallVector<SDValue>Operands{Op->op_values()};
10009Operands.erase(Operands.begin() + 1);
10010processVCIXOperands(Op,Operands, DAG);
10011
10012return DAG.getNode(Type,SDLoc(Op),Op.getValueType(),Operands);
10013}
10014
10015SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValueOp,
10016SelectionDAG &DAG) const{
10017unsigned IntNo =Op.getConstantOperandVal(1);
10018switch (IntNo) {
10019default:
10020break;
10021case Intrinsic::riscv_seg2_load:
10022case Intrinsic::riscv_seg3_load:
10023case Intrinsic::riscv_seg4_load:
10024case Intrinsic::riscv_seg5_load:
10025case Intrinsic::riscv_seg6_load:
10026case Intrinsic::riscv_seg7_load:
10027case Intrinsic::riscv_seg8_load: {
10028SDLocDL(Op);
10029staticconstIntrinsic::ID VlsegInts[7] = {
10030 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10031 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10032 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10033 Intrinsic::riscv_vlseg8};
10034unsigned NF =Op->getNumValues() - 1;
10035assert(NF >= 2 && NF <= 8 &&"Unexpected seg number");
10036MVT XLenVT = Subtarget.getXLenVT();
10037MVT VT =Op->getSimpleValueType(0);
10038MVT ContainerVT =getContainerForFixedLengthVector(VT);
10039unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10040 ContainerVT.getScalarSizeInBits();
10041EVT VecTupTy =MVT::getRISCVVectorTupleVT(Sz, NF);
10042
10043SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);
10044SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2],DL, XLenVT);
10045auto *Load = cast<MemIntrinsicSDNode>(Op);
10046
10047SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10048SDValue Ops[] = {
10049Load->getChain(),
10050 IntID,
10051 DAG.getUNDEF(VecTupTy),
10052Op.getOperand(2),
10053 VL,
10054 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()),DL, XLenVT)};
10055SDValueResult =
10056 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops,
10057Load->getMemoryVT(),Load->getMemOperand());
10058SmallVector<SDValue, 9>Results;
10059for (unsignedint RetIdx = 0; RetIdx < NF; RetIdx++) {
10060SDValue SubVec =
10061 DAG.getNode(RISCVISD::TUPLE_EXTRACT,DL, ContainerVT,
10062Result.getValue(0), DAG.getVectorIdxConstant(RetIdx,DL));
10063Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10064 }
10065Results.push_back(Result.getValue(1));
10066return DAG.getMergeValues(Results,DL);
10067 }
10068case Intrinsic::riscv_sf_vc_v_x_se:
10069returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_X_SE);
10070case Intrinsic::riscv_sf_vc_v_i_se:
10071returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_I_SE);
10072case Intrinsic::riscv_sf_vc_v_xv_se:
10073returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_XV_SE);
10074case Intrinsic::riscv_sf_vc_v_iv_se:
10075returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_IV_SE);
10076case Intrinsic::riscv_sf_vc_v_vv_se:
10077returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_VV_SE);
10078case Intrinsic::riscv_sf_vc_v_fv_se:
10079returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_FV_SE);
10080case Intrinsic::riscv_sf_vc_v_xvv_se:
10081returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_XVV_SE);
10082case Intrinsic::riscv_sf_vc_v_ivv_se:
10083returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_IVV_SE);
10084case Intrinsic::riscv_sf_vc_v_vvv_se:
10085returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_VVV_SE);
10086case Intrinsic::riscv_sf_vc_v_fvv_se:
10087returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_FVV_SE);
10088case Intrinsic::riscv_sf_vc_v_xvw_se:
10089returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_XVW_SE);
10090case Intrinsic::riscv_sf_vc_v_ivw_se:
10091returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_IVW_SE);
10092case Intrinsic::riscv_sf_vc_v_vvw_se:
10093returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_VVW_SE);
10094case Intrinsic::riscv_sf_vc_v_fvw_se:
10095returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_FVW_SE);
10096 }
10097
10098returnlowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10099}
10100
10101SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValueOp,
10102SelectionDAG &DAG) const{
10103unsigned IntNo =Op.getConstantOperandVal(1);
10104switch (IntNo) {
10105default:
10106break;
10107case Intrinsic::riscv_seg2_store:
10108case Intrinsic::riscv_seg3_store:
10109case Intrinsic::riscv_seg4_store:
10110case Intrinsic::riscv_seg5_store:
10111case Intrinsic::riscv_seg6_store:
10112case Intrinsic::riscv_seg7_store:
10113case Intrinsic::riscv_seg8_store: {
10114SDLocDL(Op);
10115staticconstIntrinsic::ID VssegInts[] = {
10116 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10117 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10118 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10119 Intrinsic::riscv_vsseg8};
10120// Operands are (chain, int_id, vec*, ptr, vl)
10121unsigned NF =Op->getNumOperands() - 4;
10122assert(NF >= 2 && NF <= 8 &&"Unexpected seg number");
10123MVT XLenVT = Subtarget.getXLenVT();
10124MVT VT =Op->getOperand(2).getSimpleValueType();
10125MVT ContainerVT =getContainerForFixedLengthVector(VT);
10126unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10127 ContainerVT.getScalarSizeInBits();
10128EVT VecTupTy =MVT::getRISCVVectorTupleVT(Sz, NF);
10129
10130SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);
10131SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2],DL, XLenVT);
10132SDValuePtr =Op->getOperand(NF + 2);
10133
10134auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10135
10136SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10137for (unsigned i = 0; i < NF; i++)
10138 StoredVal = DAG.getNode(
10139RISCVISD::TUPLE_INSERT,DL, VecTupTy, StoredVal,
10140convertToScalableVector(
10141 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10142 DAG.getVectorIdxConstant(i,DL));
10143
10144SDValue Ops[] = {
10145 FixedIntrinsic->getChain(),
10146 IntID,
10147 StoredVal,
10148Ptr,
10149 VL,
10150 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()),DL, XLenVT)};
10151
10152return DAG.getMemIntrinsicNode(
10153ISD::INTRINSIC_VOID,DL, DAG.getVTList(MVT::Other), Ops,
10154 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10155 }
10156case Intrinsic::riscv_sf_vc_xv_se:
10157returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_XV_SE);
10158case Intrinsic::riscv_sf_vc_iv_se:
10159returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_IV_SE);
10160case Intrinsic::riscv_sf_vc_vv_se:
10161returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_VV_SE);
10162case Intrinsic::riscv_sf_vc_fv_se:
10163returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_FV_SE);
10164case Intrinsic::riscv_sf_vc_xvv_se:
10165returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_XVV_SE);
10166case Intrinsic::riscv_sf_vc_ivv_se:
10167returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_IVV_SE);
10168case Intrinsic::riscv_sf_vc_vvv_se:
10169returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_VVV_SE);
10170case Intrinsic::riscv_sf_vc_fvv_se:
10171returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_FVV_SE);
10172case Intrinsic::riscv_sf_vc_xvw_se:
10173returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_XVW_SE);
10174case Intrinsic::riscv_sf_vc_ivw_se:
10175returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_IVW_SE);
10176case Intrinsic::riscv_sf_vc_vvw_se:
10177returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_VVW_SE);
10178case Intrinsic::riscv_sf_vc_fvw_se:
10179returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_FVW_SE);
10180 }
10181
10182returnlowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10183}
10184
10185staticunsignedgetRVVReductionOp(unsigned ISDOpcode) {
10186switch (ISDOpcode) {
10187default:
10188llvm_unreachable("Unhandled reduction");
10189case ISD::VP_REDUCE_ADD:
10190caseISD::VECREDUCE_ADD:
10191returnRISCVISD::VECREDUCE_ADD_VL;
10192case ISD::VP_REDUCE_UMAX:
10193caseISD::VECREDUCE_UMAX:
10194returnRISCVISD::VECREDUCE_UMAX_VL;
10195case ISD::VP_REDUCE_SMAX:
10196caseISD::VECREDUCE_SMAX:
10197returnRISCVISD::VECREDUCE_SMAX_VL;
10198case ISD::VP_REDUCE_UMIN:
10199caseISD::VECREDUCE_UMIN:
10200returnRISCVISD::VECREDUCE_UMIN_VL;
10201case ISD::VP_REDUCE_SMIN:
10202caseISD::VECREDUCE_SMIN:
10203returnRISCVISD::VECREDUCE_SMIN_VL;
10204case ISD::VP_REDUCE_AND:
10205caseISD::VECREDUCE_AND:
10206returnRISCVISD::VECREDUCE_AND_VL;
10207case ISD::VP_REDUCE_OR:
10208caseISD::VECREDUCE_OR:
10209returnRISCVISD::VECREDUCE_OR_VL;
10210case ISD::VP_REDUCE_XOR:
10211caseISD::VECREDUCE_XOR:
10212returnRISCVISD::VECREDUCE_XOR_VL;
10213case ISD::VP_REDUCE_FADD:
10214returnRISCVISD::VECREDUCE_FADD_VL;
10215case ISD::VP_REDUCE_SEQ_FADD:
10216returnRISCVISD::VECREDUCE_SEQ_FADD_VL;
10217case ISD::VP_REDUCE_FMAX:
10218case ISD::VP_REDUCE_FMAXIMUM:
10219returnRISCVISD::VECREDUCE_FMAX_VL;
10220case ISD::VP_REDUCE_FMIN:
10221case ISD::VP_REDUCE_FMINIMUM:
10222returnRISCVISD::VECREDUCE_FMIN_VL;
10223 }
10224
10225}
10226
10227SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValueOp,
10228SelectionDAG &DAG,
10229bool IsVP) const{
10230SDLocDL(Op);
10231SDValue Vec =Op.getOperand(IsVP ? 1 : 0);
10232MVT VecVT = Vec.getSimpleValueType();
10233assert((Op.getOpcode() ==ISD::VECREDUCE_AND ||
10234Op.getOpcode() ==ISD::VECREDUCE_OR ||
10235Op.getOpcode() ==ISD::VECREDUCE_XOR ||
10236Op.getOpcode() == ISD::VP_REDUCE_AND ||
10237Op.getOpcode() == ISD::VP_REDUCE_OR ||
10238Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10239"Unexpected reduction lowering");
10240
10241MVT XLenVT = Subtarget.getXLenVT();
10242
10243MVT ContainerVT = VecVT;
10244if (VecVT.isFixedLengthVector()) {
10245 ContainerVT =getContainerForFixedLengthVector(VecVT);
10246 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10247 }
10248
10249SDValueMask, VL;
10250if (IsVP) {
10251Mask =Op.getOperand(2);
10252 VL =Op.getOperand(3);
10253 }else {
10254 std::tie(Mask, VL) =
10255getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);
10256 }
10257
10258ISD::CondCodeCC;
10259switch (Op.getOpcode()) {
10260default:
10261llvm_unreachable("Unhandled reduction");
10262caseISD::VECREDUCE_AND:
10263case ISD::VP_REDUCE_AND: {
10264// vcpop ~x == 0
10265SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);
10266if (IsVP || VecVT.isFixedLengthVector())
10267 Vec = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Vec, TrueMask, VL);
10268else
10269 Vec = DAG.getNode(ISD::XOR,DL, ContainerVT, Vec, TrueMask);
10270 Vec = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Vec, Mask, VL);
10271CC =ISD::SETEQ;
10272break;
10273 }
10274caseISD::VECREDUCE_OR:
10275case ISD::VP_REDUCE_OR:
10276// vcpop x != 0
10277 Vec = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Vec, Mask, VL);
10278CC =ISD::SETNE;
10279break;
10280caseISD::VECREDUCE_XOR:
10281case ISD::VP_REDUCE_XOR: {
10282// ((vcpop x) & 1) != 0
10283SDValue One = DAG.getConstant(1,DL, XLenVT);
10284 Vec = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Vec, Mask, VL);
10285 Vec = DAG.getNode(ISD::AND,DL, XLenVT, Vec, One);
10286CC =ISD::SETNE;
10287break;
10288 }
10289 }
10290
10291SDValueZero = DAG.getConstant(0,DL, XLenVT);
10292SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero,CC);
10293 SetCC = DAG.getNode(ISD::TRUNCATE,DL,Op.getValueType(), SetCC);
10294
10295if (!IsVP)
10296return SetCC;
10297
10298// Now include the start value in the operation.
10299// Note that we must return the start value when no elements are operated
10300// upon. The vcpop instructions we've emitted in each case above will return
10301// 0 for an inactive vector, and so we've already received the neutral value:
10302// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10303// can simply include the start value.
10304unsigned BaseOpc =ISD::getVecReduceBaseOpcode(Op.getOpcode());
10305return DAG.getNode(BaseOpc,DL,Op.getValueType(), SetCC,Op.getOperand(0));
10306}
10307
10308staticboolisNonZeroAVL(SDValue AVL) {
10309auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10310auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10311return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10312 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10313}
10314
10315/// Helper to lower a reduction sequence of the form:
10316/// scalar = reduce_op vec, scalar_start
10317staticSDValuelowerReductionSeq(unsigned RVVOpcode,MVT ResVT,
10318SDValue StartValue,SDValue Vec,SDValue Mask,
10319SDValue VL,constSDLoc &DL,SelectionDAG &DAG,
10320constRISCVSubtarget &Subtarget) {
10321constMVT VecVT = Vec.getSimpleValueType();
10322constMVT M1VT =getLMUL1VT(VecVT);
10323constMVT XLenVT = Subtarget.getXLenVT();
10324constbool NonZeroAVL =isNonZeroAVL(VL);
10325
10326// The reduction needs an LMUL1 input; do the splat at either LMUL1
10327// or the original VT if fractional.
10328auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10329// We reuse the VL of the reduction to reduce vsetvli toggles if we can
10330// prove it is non-zero. For the AVL=0 case, we need the scalar to
10331// be the result of the reduction operation.
10332auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1,DL, XLenVT);
10333SDValue InitialValue =lowerScalarInsert(StartValue, InnerVL, InnerVT,DL,
10334 DAG, Subtarget);
10335if (M1VT != InnerVT)
10336 InitialValue =
10337 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, M1VT, DAG.getUNDEF(M1VT),
10338 InitialValue, DAG.getVectorIdxConstant(0,DL));
10339SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10340SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT);
10341SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10342SDValueReduction = DAG.getNode(RVVOpcode,DL, M1VT, Ops);
10343return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, ResVT,Reduction,
10344 DAG.getVectorIdxConstant(0,DL));
10345}
10346
10347SDValue RISCVTargetLowering::lowerVECREDUCE(SDValueOp,
10348SelectionDAG &DAG) const{
10349SDLocDL(Op);
10350SDValue Vec =Op.getOperand(0);
10351EVT VecEVT = Vec.getValueType();
10352
10353unsigned BaseOpc =ISD::getVecReduceBaseOpcode(Op.getOpcode());
10354
10355// Due to ordering in legalize types we may have a vector type that needs to
10356// be split. Do that manually so we can get down to a legal type.
10357while (getTypeAction(*DAG.getContext(), VecEVT) ==
10358TargetLowering::TypeSplitVector) {
10359auto [Lo,Hi] = DAG.SplitVector(Vec,DL);
10360 VecEVT =Lo.getValueType();
10361 Vec = DAG.getNode(BaseOpc,DL, VecEVT,Lo,Hi);
10362 }
10363
10364// TODO: The type may need to be widened rather than split. Or widened before
10365// it can be split.
10366if (!isTypeLegal(VecEVT))
10367returnSDValue();
10368
10369MVT VecVT = VecEVT.getSimpleVT();
10370MVT VecEltVT = VecVT.getVectorElementType();
10371unsigned RVVOpcode =getRVVReductionOp(Op.getOpcode());
10372
10373MVT ContainerVT = VecVT;
10374if (VecVT.isFixedLengthVector()) {
10375 ContainerVT =getContainerForFixedLengthVector(VecVT);
10376 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10377 }
10378
10379auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);
10380
10381SDValue StartV = DAG.getNeutralElement(BaseOpc,DL, VecEltVT,SDNodeFlags());
10382switch (BaseOpc) {
10383caseISD::AND:
10384caseISD::OR:
10385caseISD::UMAX:
10386caseISD::UMIN:
10387caseISD::SMAX:
10388caseISD::SMIN:
10389 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, VecEltVT, Vec,
10390 DAG.getVectorIdxConstant(0,DL));
10391 }
10392returnlowerReductionSeq(RVVOpcode,Op.getSimpleValueType(), StartV, Vec,
10393 Mask, VL,DL, DAG, Subtarget);
10394}
10395
10396// Given a reduction op, this function returns the matching reduction opcode,
10397// the vector SDValue and the scalar SDValue required to lower this to a
10398// RISCVISD node.
10399static std::tuple<unsigned, SDValue, SDValue>
10400getRVVFPReductionOpAndOperands(SDValueOp,SelectionDAG &DAG,EVT EltVT,
10401constRISCVSubtarget &Subtarget) {
10402SDLocDL(Op);
10403auto Flags =Op->getFlags();
10404unsigned Opcode =Op.getOpcode();
10405switch (Opcode) {
10406default:
10407llvm_unreachable("Unhandled reduction");
10408caseISD::VECREDUCE_FADD: {
10409// Use positive zero if we can. It is cheaper to materialize.
10410SDValue Zero =
10411 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0,DL, EltVT);
10412return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL,Op.getOperand(0), Zero);
10413 }
10414caseISD::VECREDUCE_SEQ_FADD:
10415return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL,Op.getOperand(1),
10416Op.getOperand(0));
10417caseISD::VECREDUCE_FMINIMUM:
10418caseISD::VECREDUCE_FMAXIMUM:
10419caseISD::VECREDUCE_FMIN:
10420caseISD::VECREDUCE_FMAX: {
10421SDValue Front =
10422 DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, EltVT,Op.getOperand(0),
10423 DAG.getVectorIdxConstant(0,DL));
10424unsigned RVVOpc =
10425 (Opcode ==ISD::VECREDUCE_FMIN || Opcode ==ISD::VECREDUCE_FMINIMUM)
10426 ?RISCVISD::VECREDUCE_FMIN_VL
10427 :RISCVISD::VECREDUCE_FMAX_VL;
10428return std::make_tuple(RVVOpc,Op.getOperand(0), Front);
10429 }
10430 }
10431}
10432
10433SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValueOp,
10434SelectionDAG &DAG) const{
10435SDLocDL(Op);
10436MVT VecEltVT =Op.getSimpleValueType();
10437
10438unsigned RVVOpcode;
10439SDValue VectorVal, ScalarVal;
10440 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10441getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10442MVT VecVT = VectorVal.getSimpleValueType();
10443
10444MVT ContainerVT = VecVT;
10445if (VecVT.isFixedLengthVector()) {
10446 ContainerVT =getContainerForFixedLengthVector(VecVT);
10447 VectorVal =convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10448 }
10449
10450MVT ResVT =Op.getSimpleValueType();
10451auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);
10452SDValue Res =lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10453 VL,DL, DAG, Subtarget);
10454if (Op.getOpcode() !=ISD::VECREDUCE_FMINIMUM &&
10455Op.getOpcode() !=ISD::VECREDUCE_FMAXIMUM)
10456return Res;
10457
10458if (Op->getFlags().hasNoNaNs())
10459return Res;
10460
10461// Force output to NaN if any element is Nan.
10462SDValue IsNan =
10463 DAG.getNode(RISCVISD::SETCC_VL,DL,Mask.getValueType(),
10464 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10465 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10466MVT XLenVT = Subtarget.getXLenVT();
10467SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, IsNan, Mask, VL);
10468SDValueNoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10469 DAG.getConstant(0,DL, XLenVT),ISD::SETEQ);
10470return DAG.getSelect(
10471DL, ResVT, NoNaNs, Res,
10472 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()),DL, ResVT));
10473}
10474
10475SDValue RISCVTargetLowering::lowerVPREDUCE(SDValueOp,
10476SelectionDAG &DAG) const{
10477SDLocDL(Op);
10478unsigned Opc =Op.getOpcode();
10479SDValue Start =Op.getOperand(0);
10480SDValue Vec =Op.getOperand(1);
10481EVT VecEVT = Vec.getValueType();
10482MVT XLenVT = Subtarget.getXLenVT();
10483
10484// TODO: The type may need to be widened rather than split. Or widened before
10485// it can be split.
10486if (!isTypeLegal(VecEVT))
10487returnSDValue();
10488
10489MVT VecVT = VecEVT.getSimpleVT();
10490unsigned RVVOpcode =getRVVReductionOp(Opc);
10491
10492if (VecVT.isFixedLengthVector()) {
10493auto ContainerVT =getContainerForFixedLengthVector(VecVT);
10494 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10495 }
10496
10497SDValue VL =Op.getOperand(3);
10498SDValueMask =Op.getOperand(2);
10499SDValue Res =
10500lowerReductionSeq(RVVOpcode,Op.getSimpleValueType(),Op.getOperand(0),
10501 Vec, Mask, VL,DL, DAG, Subtarget);
10502if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10503Op->getFlags().hasNoNaNs())
10504return Res;
10505
10506// Propagate NaNs.
10507MVT PredVT =getMaskTypeFor(Vec.getSimpleValueType());
10508// Check if any of the elements in Vec is NaN.
10509SDValue IsNaN = DAG.getNode(
10510RISCVISD::SETCC_VL,DL, PredVT,
10511 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10512SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, IsNaN, Mask, VL);
10513// Check if the start value is NaN.
10514SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start,ISD::SETUO);
10515 VCPop = DAG.getNode(ISD::OR,DL, XLenVT, VCPop, StartIsNaN);
10516SDValueNoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10517 DAG.getConstant(0,DL, XLenVT),ISD::SETEQ);
10518MVT ResVT = Res.getSimpleValueType();
10519return DAG.getSelect(
10520DL, ResVT, NoNaNs, Res,
10521 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()),DL, ResVT));
10522}
10523
10524SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValueOp,
10525SelectionDAG &DAG) const{
10526SDValue Vec =Op.getOperand(0);
10527SDValue SubVec =Op.getOperand(1);
10528MVT VecVT = Vec.getSimpleValueType();
10529MVT SubVecVT = SubVec.getSimpleValueType();
10530
10531SDLocDL(Op);
10532MVT XLenVT = Subtarget.getXLenVT();
10533unsigned OrigIdx =Op.getConstantOperandVal(2);
10534constRISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10535
10536if (OrigIdx == 0 && Vec.isUndef())
10537returnOp;
10538
10539// We don't have the ability to slide mask vectors up indexed by their i1
10540// elements; the smallest we can do is i8. Often we are able to bitcast to
10541// equivalent i8 vectors. Note that when inserting a fixed-length vector
10542// into a scalable one, we might not necessarily have enough scalable
10543// elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10544if (SubVecVT.getVectorElementType() == MVT::i1) {
10545if (VecVT.getVectorMinNumElements() >= 8 &&
10546 SubVecVT.getVectorMinNumElements() >= 8) {
10547assert(OrigIdx % 8 == 0 &&"Invalid index");
10548assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10549 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10550"Unexpected mask vector lowering");
10551 OrigIdx /= 8;
10552 SubVecVT =
10553MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10554 SubVecVT.isScalableVector());
10555 VecVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10556 VecVT.isScalableVector());
10557 Vec = DAG.getBitcast(VecVT, Vec);
10558 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10559 }else {
10560// We can't slide this mask vector up indexed by its i1 elements.
10561// This poses a problem when we wish to insert a scalable vector which
10562// can't be re-expressed as a larger type. Just choose the slow path and
10563// extend to a larger type, then truncate back down.
10564MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10565MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10566 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, ExtVecVT, Vec);
10567 SubVec = DAG.getNode(ISD::ZERO_EXTEND,DL, ExtSubVecVT, SubVec);
10568 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ExtVecVT, Vec, SubVec,
10569Op.getOperand(2));
10570SDValue SplatZero = DAG.getConstant(0,DL, ExtVecVT);
10571return DAG.getSetCC(DL, VecVT, Vec, SplatZero,ISD::SETNE);
10572 }
10573 }
10574
10575// If the subvector vector is a fixed-length type and we don't know VLEN
10576// exactly, we cannot use subregister manipulation to simplify the codegen; we
10577// don't know which register of a LMUL group contains the specific subvector
10578// as we only know the minimum register size. Therefore we must slide the
10579// vector group up the full amount.
10580constauto VLen = Subtarget.getRealVLen();
10581if (SubVecVT.isFixedLengthVector() && !VLen) {
10582MVT ContainerVT = VecVT;
10583if (VecVT.isFixedLengthVector()) {
10584 ContainerVT =getContainerForFixedLengthVector(VecVT);
10585 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10586 }
10587
10588 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT,
10589 DAG.getUNDEF(ContainerVT), SubVec,
10590 DAG.getVectorIdxConstant(0,DL));
10591
10592SDValueMask =
10593getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).first;
10594// Set the vector length to only the number of elements we care about. Note
10595// that for slideup this includes the offset.
10596unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10597SDValue VL = DAG.getConstant(EndIndex,DL, XLenVT);
10598
10599// Use tail agnostic policy if we're inserting over Vec's tail.
10600unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
10601if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10602 Policy =RISCVII::TAIL_AGNOSTIC;
10603
10604// If we're inserting into the lowest elements, use a tail undisturbed
10605// vmv.v.v.
10606if (OrigIdx == 0) {
10607 SubVec =
10608 DAG.getNode(RISCVISD::VMV_V_V_VL,DL, ContainerVT, Vec, SubVec, VL);
10609 }else {
10610SDValue SlideupAmt = DAG.getConstant(OrigIdx,DL, XLenVT);
10611 SubVec =getVSlideup(DAG, Subtarget,DL, ContainerVT, Vec, SubVec,
10612 SlideupAmt, Mask, VL, Policy);
10613 }
10614
10615if (VecVT.isFixedLengthVector())
10616 SubVec =convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10617return DAG.getBitcast(Op.getValueType(), SubVec);
10618 }
10619
10620MVT ContainerVecVT = VecVT;
10621if (VecVT.isFixedLengthVector()) {
10622 ContainerVecVT =getContainerForFixedLengthVector(VecVT);
10623 Vec =convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10624 }
10625
10626MVT ContainerSubVecVT = SubVecVT;
10627if (SubVecVT.isFixedLengthVector()) {
10628 ContainerSubVecVT =getContainerForFixedLengthVector(SubVecVT);
10629 SubVec =convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10630 }
10631
10632unsigned SubRegIdx;
10633ElementCount RemIdx;
10634// insert_subvector scales the index by vscale if the subvector is scalable,
10635// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10636// we have a fixed length subvector, we need to adjust the index by 1/vscale.
10637if (SubVecVT.isFixedLengthVector()) {
10638assert(VLen);
10639unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;
10640auto Decompose =
10641RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10642 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale,TRI);
10643 SubRegIdx = Decompose.first;
10644 RemIdx =ElementCount::getFixed((Decompose.second * Vscale) +
10645 (OrigIdx % Vscale));
10646 }else {
10647auto Decompose =
10648RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10649 ContainerVecVT, ContainerSubVecVT, OrigIdx,TRI);
10650 SubRegIdx = Decompose.first;
10651 RemIdx =ElementCount::getScalable(Decompose.second);
10652 }
10653
10654TypeSize VecRegSize =TypeSize::getScalable(RISCV::RVVBitsPerBlock);
10655assert(isPowerOf2_64(
10656 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10657bool ExactlyVecRegSized =
10658 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10659 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10660
10661// 1. If the Idx has been completely eliminated and this subvector's size is
10662// a vector register or a multiple thereof, or the surrounding elements are
10663// undef, then this is a subvector insert which naturally aligns to a vector
10664// register. These can easily be handled using subregister manipulation.
10665// 2. If the subvector isn't an exact multiple of a valid register group size,
10666// then the insertion must preserve the undisturbed elements of the register.
10667// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10668// vector type (which resolves to a subregister copy), performing a VSLIDEUP
10669// to place the subvector within the vector register, and an INSERT_SUBVECTOR
10670// of that LMUL=1 type back into the larger vector (resolving to another
10671// subregister operation). See below for how our VSLIDEUP works. We go via a
10672// LMUL=1 type to avoid allocating a large register group to hold our
10673// subvector.
10674if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10675if (SubVecVT.isFixedLengthVector()) {
10676// We may get NoSubRegister if inserting at index 0 and the subvec
10677// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10678if (SubRegIdx == RISCV::NoSubRegister) {
10679assert(OrigIdx == 0);
10680returnOp;
10681 }
10682
10683// Use a insert_subvector that will resolve to an insert subreg.
10684assert(VLen);
10685unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;
10686SDValueInsert =
10687 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVecVT, Vec, SubVec,
10688 DAG.getConstant(OrigIdx / Vscale,DL, XLenVT));
10689if (VecVT.isFixedLengthVector())
10690Insert =convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10691returnInsert;
10692 }
10693returnOp;
10694 }
10695
10696// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10697// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10698// (in our case undisturbed). This means we can set up a subvector insertion
10699// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10700// size of the subvector.
10701MVT InterSubVT = ContainerVecVT;
10702SDValue AlignedExtract = Vec;
10703unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10704if (SubVecVT.isFixedLengthVector()) {
10705assert(VLen);
10706 AlignedIdx /= *VLen /RISCV::RVVBitsPerBlock;
10707 }
10708if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10709 InterSubVT =getLMUL1VT(ContainerVecVT);
10710// Extract a subvector equal to the nearest full vector register type. This
10711// should resolve to a EXTRACT_SUBREG instruction.
10712 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, InterSubVT, Vec,
10713 DAG.getVectorIdxConstant(AlignedIdx,DL));
10714 }
10715
10716 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, InterSubVT,
10717 DAG.getUNDEF(InterSubVT), SubVec,
10718 DAG.getVectorIdxConstant(0,DL));
10719
10720auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVecVT,DL, DAG, Subtarget);
10721
10722ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10723 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10724
10725// Use tail agnostic policy if we're inserting over InterSubVT's tail.
10726unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
10727if (Subtarget.expandVScale(EndIndex) ==
10728 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10729 Policy =RISCVII::TAIL_AGNOSTIC;
10730
10731// If we're inserting into the lowest elements, use a tail undisturbed
10732// vmv.v.v.
10733if (RemIdx.isZero()) {
10734 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL,DL, InterSubVT, AlignedExtract,
10735 SubVec, VL);
10736 }else {
10737SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10738
10739// Construct the vector length corresponding to RemIdx + length(SubVecVT).
10740 VL = DAG.getNode(ISD::ADD,DL, XLenVT, SlideupAmt, VL);
10741
10742 SubVec =getVSlideup(DAG, Subtarget,DL, InterSubVT, AlignedExtract, SubVec,
10743 SlideupAmt, Mask, VL, Policy);
10744 }
10745
10746// If required, insert this subvector back into the correct vector register.
10747// This should resolve to an INSERT_SUBREG instruction.
10748if (ContainerVecVT.bitsGT(InterSubVT))
10749 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVecVT, Vec, SubVec,
10750 DAG.getVectorIdxConstant(AlignedIdx,DL));
10751
10752if (VecVT.isFixedLengthVector())
10753 SubVec =convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10754
10755// We might have bitcast from a mask type: cast back to the original type if
10756// required.
10757return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10758}
10759
10760SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValueOp,
10761SelectionDAG &DAG) const{
10762SDValue Vec =Op.getOperand(0);
10763MVT SubVecVT =Op.getSimpleValueType();
10764MVT VecVT = Vec.getSimpleValueType();
10765
10766SDLocDL(Op);
10767MVT XLenVT = Subtarget.getXLenVT();
10768unsigned OrigIdx =Op.getConstantOperandVal(1);
10769constRISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10770
10771// With an index of 0 this is a cast-like subvector, which can be performed
10772// with subregister operations.
10773if (OrigIdx == 0)
10774returnOp;
10775
10776// We don't have the ability to slide mask vectors down indexed by their i1
10777// elements; the smallest we can do is i8. Often we are able to bitcast to
10778// equivalent i8 vectors. Note that when extracting a fixed-length vector
10779// from a scalable one, we might not necessarily have enough scalable
10780// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10781if (SubVecVT.getVectorElementType() == MVT::i1) {
10782if (VecVT.getVectorMinNumElements() >= 8 &&
10783 SubVecVT.getVectorMinNumElements() >= 8) {
10784assert(OrigIdx % 8 == 0 &&"Invalid index");
10785assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10786 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10787"Unexpected mask vector lowering");
10788 OrigIdx /= 8;
10789 SubVecVT =
10790MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10791 SubVecVT.isScalableVector());
10792 VecVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10793 VecVT.isScalableVector());
10794 Vec = DAG.getBitcast(VecVT, Vec);
10795 }else {
10796// We can't slide this mask vector down, indexed by its i1 elements.
10797// This poses a problem when we wish to extract a scalable vector which
10798// can't be re-expressed as a larger type. Just choose the slow path and
10799// extend to a larger type, then truncate back down.
10800// TODO: We could probably improve this when extracting certain fixed
10801// from fixed, where we can extract as i8 and shift the correct element
10802// right to reach the desired subvector?
10803MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10804MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10805 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, ExtVecVT, Vec);
10806 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ExtSubVecVT, Vec,
10807Op.getOperand(1));
10808SDValue SplatZero = DAG.getConstant(0,DL, ExtSubVecVT);
10809return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero,ISD::SETNE);
10810 }
10811 }
10812
10813constauto VLen = Subtarget.getRealVLen();
10814
10815// If the subvector vector is a fixed-length type and we don't know VLEN
10816// exactly, we cannot use subregister manipulation to simplify the codegen; we
10817// don't know which register of a LMUL group contains the specific subvector
10818// as we only know the minimum register size. Therefore we must slide the
10819// vector group down the full amount.
10820if (SubVecVT.isFixedLengthVector() && !VLen) {
10821MVT ContainerVT = VecVT;
10822if (VecVT.isFixedLengthVector()) {
10823 ContainerVT =getContainerForFixedLengthVector(VecVT);
10824 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10825 }
10826
10827// Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10828unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10829if (auto ShrunkVT =
10830getSmallestVTForIndex(ContainerVT, LastIdx,DL, DAG, Subtarget)) {
10831 ContainerVT = *ShrunkVT;
10832 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Vec,
10833 DAG.getVectorIdxConstant(0,DL));
10834 }
10835
10836SDValueMask =
10837getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).first;
10838// Set the vector length to only the number of elements we care about. This
10839// avoids sliding down elements we're going to discard straight away.
10840SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(),DL, XLenVT);
10841SDValue SlidedownAmt = DAG.getConstant(OrigIdx,DL, XLenVT);
10842SDValue Slidedown =
10843getVSlidedown(DAG, Subtarget,DL, ContainerVT,
10844 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10845// Now we can use a cast-like subvector extract to get the result.
10846 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, SubVecVT, Slidedown,
10847 DAG.getVectorIdxConstant(0,DL));
10848return DAG.getBitcast(Op.getValueType(), Slidedown);
10849 }
10850
10851if (VecVT.isFixedLengthVector()) {
10852 VecVT =getContainerForFixedLengthVector(VecVT);
10853 Vec =convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10854 }
10855
10856MVT ContainerSubVecVT = SubVecVT;
10857if (SubVecVT.isFixedLengthVector())
10858 ContainerSubVecVT =getContainerForFixedLengthVector(SubVecVT);
10859
10860unsigned SubRegIdx;
10861ElementCount RemIdx;
10862// extract_subvector scales the index by vscale if the subvector is scalable,
10863// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10864// we have a fixed length subvector, we need to adjust the index by 1/vscale.
10865if (SubVecVT.isFixedLengthVector()) {
10866assert(VLen);
10867unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;
10868auto Decompose =
10869RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10870 VecVT, ContainerSubVecVT, OrigIdx / Vscale,TRI);
10871 SubRegIdx = Decompose.first;
10872 RemIdx =ElementCount::getFixed((Decompose.second * Vscale) +
10873 (OrigIdx % Vscale));
10874 }else {
10875auto Decompose =
10876RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10877 VecVT, ContainerSubVecVT, OrigIdx,TRI);
10878 SubRegIdx = Decompose.first;
10879 RemIdx =ElementCount::getScalable(Decompose.second);
10880 }
10881
10882// If the Idx has been completely eliminated then this is a subvector extract
10883// which naturally aligns to a vector register. These can easily be handled
10884// using subregister manipulation. We use an extract_subvector that will
10885// resolve to an extract subreg.
10886if (RemIdx.isZero()) {
10887if (SubVecVT.isFixedLengthVector()) {
10888assert(VLen);
10889unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;
10890 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerSubVecVT, Vec,
10891 DAG.getConstant(OrigIdx / Vscale,DL, XLenVT));
10892returnconvertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10893 }
10894returnOp;
10895 }
10896
10897// Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10898// was > M1 then the index would need to be a multiple of VLMAX, and so would
10899// divide exactly.
10900assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10901getLMUL(ContainerSubVecVT) ==RISCVII::VLMUL::LMUL_1);
10902
10903// If the vector type is an LMUL-group type, extract a subvector equal to the
10904// nearest full vector register type.
10905MVT InterSubVT = VecVT;
10906if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10907// If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10908// we should have successfully decomposed the extract into a subregister.
10909// We use an extract_subvector that will resolve to a subreg extract.
10910assert(SubRegIdx != RISCV::NoSubRegister);
10911 (void)SubRegIdx;
10912unsignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10913if (SubVecVT.isFixedLengthVector()) {
10914assert(VLen);
10915Idx /= *VLen /RISCV::RVVBitsPerBlock;
10916 }
10917 InterSubVT =getLMUL1VT(VecVT);
10918 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, InterSubVT, Vec,
10919 DAG.getConstant(Idx,DL, XLenVT));
10920 }
10921
10922// Slide this vector register down by the desired number of elements in order
10923// to place the desired subvector starting at element 0.
10924SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10925auto [Mask, VL] =getDefaultScalableVLOps(InterSubVT,DL, DAG, Subtarget);
10926if (SubVecVT.isFixedLengthVector())
10927 VL = DAG.getConstant(SubVecVT.getVectorNumElements(),DL, XLenVT);
10928SDValue Slidedown =
10929getVSlidedown(DAG, Subtarget,DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10930 Vec, SlidedownAmt, Mask, VL);
10931
10932// Now the vector is in the right position, extract our final subvector. This
10933// should resolve to a COPY.
10934 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, SubVecVT, Slidedown,
10935 DAG.getVectorIdxConstant(0,DL));
10936
10937// We might have bitcast from a mask type: cast back to the original type if
10938// required.
10939return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10940}
10941
10942// Widen a vector's operands to i8, then truncate its results back to the
10943// original type, typically i1. All operand and result types must be the same.
10944staticSDValuewidenVectorOpsToi8(SDValueN,constSDLoc &DL,
10945SelectionDAG &DAG) {
10946MVT VT =N.getSimpleValueType();
10947MVT WideVT = VT.changeVectorElementType(MVT::i8);
10948SmallVector<SDValue, 4> WideOps;
10949for (SDValueOp :N->ops()) {
10950assert(Op.getSimpleValueType() == VT &&
10951"Operands and result must be same type");
10952 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT,Op));
10953 }
10954
10955unsigned NumVals =N->getNumValues();
10956
10957SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
10958 NumVals,N.getValueType().changeVectorElementType(MVT::i8)));
10959SDValue WideN = DAG.getNode(N.getOpcode(),DL, VTs, WideOps);
10960SmallVector<SDValue, 4> TruncVals;
10961for (unsignedI = 0;I < NumVals;I++) {
10962 TruncVals.push_back(
10963 DAG.getSetCC(DL,N->getSimpleValueType(I), WideN.getValue(I),
10964 DAG.getConstant(0,DL, WideVT),ISD::SETNE));
10965 }
10966
10967if (TruncVals.size() > 1)
10968return DAG.getMergeValues(TruncVals,DL);
10969return TruncVals.front();
10970}
10971
10972SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValueOp,
10973SelectionDAG &DAG) const{
10974SDLocDL(Op);
10975MVT VecVT =Op.getSimpleValueType();
10976
10977assert(VecVT.isScalableVector() &&
10978"vector_interleave on non-scalable vector!");
10979
10980// 1 bit element vectors need to be widened to e8
10981if (VecVT.getVectorElementType() == MVT::i1)
10982returnwidenVectorOpsToi8(Op,DL, DAG);
10983
10984// If the VT is LMUL=8, we need to split and reassemble.
10985if (VecVT.getSizeInBits().getKnownMinValue() ==
10986 (8 *RISCV::RVVBitsPerBlock)) {
10987auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10988auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10989EVT SplitVT = Op0Lo.getValueType();
10990
10991SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE,DL,
10992 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10993SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE,DL,
10994 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10995
10996SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT,
10997 ResLo.getValue(0), ResHi.getValue(0));
10998SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT, ResLo.getValue(1),
10999 ResHi.getValue(1));
11000return DAG.getMergeValues({Even, Odd},DL);
11001 }
11002
11003// Concatenate the two vectors as one vector to deinterleave
11004MVT ConcatVT =
11005MVT::getVectorVT(VecVT.getVectorElementType(),
11006 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
11007SDValueConcat = DAG.getNode(ISD::CONCAT_VECTORS,DL, ConcatVT,
11008Op.getOperand(0),Op.getOperand(1));
11009
11010// We can deinterleave through vnsrl.wi if the element type is smaller than
11011// ELEN
11012if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11013SDValue Even =getDeinterleaveShiftAndTrunc(DL, VecVT,Concat, 2, 0, DAG);
11014SDValue Odd =getDeinterleaveShiftAndTrunc(DL, VecVT,Concat, 2, 1, DAG);
11015return DAG.getMergeValues({Even, Odd},DL);
11016 }
11017
11018// For the indices, use the vmv.v.x of an i8 constant to fill the largest
11019// possibly mask vector, then extract the required subvector. Doing this
11020// (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11021// creation to be rematerialized during register allocation to reduce
11022// register pressure if needed.
11023
11024MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11025
11026SDValue EvenSplat = DAG.getConstant(0b01010101,DL, MVT::nxv8i8);
11027 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11028SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, MaskVT, EvenSplat,
11029 DAG.getVectorIdxConstant(0,DL));
11030
11031SDValue OddSplat = DAG.getConstant(0b10101010,DL, MVT::nxv8i8);
11032 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11033SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, MaskVT, OddSplat,
11034 DAG.getVectorIdxConstant(0,DL));
11035
11036// vcompress the even and odd elements into two separate vectors
11037SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS,DL, ConcatVT,Concat,
11038 EvenMask, DAG.getUNDEF(ConcatVT));
11039SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS,DL, ConcatVT,Concat,
11040 OddMask, DAG.getUNDEF(ConcatVT));
11041
11042// Extract the result half of the gather for even and odd
11043SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VecVT, EvenWide,
11044 DAG.getVectorIdxConstant(0,DL));
11045SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VecVT, OddWide,
11046 DAG.getVectorIdxConstant(0,DL));
11047
11048return DAG.getMergeValues({Even, Odd},DL);
11049}
11050
11051SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValueOp,
11052SelectionDAG &DAG) const{
11053SDLocDL(Op);
11054MVT VecVT =Op.getSimpleValueType();
11055
11056assert(VecVT.isScalableVector() &&
11057"vector_interleave on non-scalable vector!");
11058
11059// i1 vectors need to be widened to i8
11060if (VecVT.getVectorElementType() == MVT::i1)
11061returnwidenVectorOpsToi8(Op,DL, DAG);
11062
11063MVT XLenVT = Subtarget.getXLenVT();
11064SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11065
11066// If the VT is LMUL=8, we need to split and reassemble.
11067if (VecVT.getSizeInBits().getKnownMinValue() == (8 *RISCV::RVVBitsPerBlock)) {
11068auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11069auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11070EVT SplitVT = Op0Lo.getValueType();
11071
11072SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE,DL,
11073 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11074SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE,DL,
11075 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11076
11077SDValueLo = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT,
11078 ResLo.getValue(0), ResLo.getValue(1));
11079SDValueHi = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT,
11080 ResHi.getValue(0), ResHi.getValue(1));
11081return DAG.getMergeValues({Lo,Hi},DL);
11082 }
11083
11084SDValue Interleaved;
11085
11086// If the element type is smaller than ELEN, then we can interleave with
11087// vwaddu.vv and vwmaccu.vx
11088if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11089 Interleaved =getWideningInterleave(Op.getOperand(0),Op.getOperand(1),DL,
11090 DAG, Subtarget);
11091 }else {
11092// Otherwise, fallback to using vrgathere16.vv
11093MVT ConcatVT =
11094MVT::getVectorVT(VecVT.getVectorElementType(),
11095 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
11096SDValueConcat = DAG.getNode(ISD::CONCAT_VECTORS,DL, ConcatVT,
11097Op.getOperand(0),Op.getOperand(1));
11098
11099MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11100
11101// 0 1 2 3 4 5 6 7 ...
11102SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11103
11104// 1 1 1 1 1 1 1 1 ...
11105SDValue Ones = DAG.getSplatVector(IdxVT,DL, DAG.getConstant(1,DL, XLenVT));
11106
11107// 1 0 1 0 1 0 1 0 ...
11108SDValue OddMask = DAG.getNode(ISD::AND,DL, IdxVT, StepVec, Ones);
11109 OddMask = DAG.getSetCC(
11110DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11111 DAG.getSplatVector(IdxVT,DL, DAG.getConstant(0,DL, XLenVT)),
11112ISD::CondCode::SETNE);
11113
11114SDValue VLMax = DAG.getSplatVector(IdxVT,DL,computeVLMax(VecVT,DL, DAG));
11115
11116// Build up the index vector for interleaving the concatenated vector
11117// 0 0 1 1 2 2 3 3 ...
11118SDValueIdx = DAG.getNode(ISD::SRL,DL, IdxVT, StepVec, Ones);
11119// 0 n 1 n+1 2 n+2 3 n+3 ...
11120Idx =
11121 DAG.getNode(RISCVISD::ADD_VL,DL, IdxVT,Idx, VLMax,Idx, OddMask, VL);
11122
11123// Then perform the interleave
11124// v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11125SDValue TrueMask =getAllOnesMask(IdxVT, VL,DL, DAG);
11126 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL,DL, ConcatVT,
11127Concat,Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11128 }
11129
11130// Extract the two halves from the interleaved result
11131SDValueLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VecVT, Interleaved,
11132 DAG.getVectorIdxConstant(0,DL));
11133SDValueHi = DAG.getNode(
11134ISD::EXTRACT_SUBVECTOR,DL, VecVT, Interleaved,
11135 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(),DL));
11136
11137return DAG.getMergeValues({Lo,Hi},DL);
11138}
11139
11140// Lower step_vector to the vid instruction. Any non-identity step value must
11141// be accounted for my manual expansion.
11142SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValueOp,
11143SelectionDAG &DAG) const{
11144SDLocDL(Op);
11145MVT VT =Op.getSimpleValueType();
11146assert(VT.isScalableVector() &&"Expected scalable vector");
11147MVT XLenVT = Subtarget.getXLenVT();
11148auto [Mask, VL] =getDefaultScalableVLOps(VT,DL, DAG, Subtarget);
11149SDValue StepVec = DAG.getNode(RISCVISD::VID_VL,DL, VT, Mask, VL);
11150uint64_t StepValImm =Op.getConstantOperandVal(0);
11151if (StepValImm != 1) {
11152if (isPowerOf2_64(StepValImm)) {
11153SDValue StepVal =
11154 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),
11155 DAG.getConstant(Log2_64(StepValImm),DL, XLenVT), VL);
11156 StepVec = DAG.getNode(ISD::SHL,DL, VT, StepVec, StepVal);
11157 }else {
11158SDValue StepVal =lowerScalarSplat(
11159SDValue(), DAG.getConstant(StepValImm,DL, VT.getVectorElementType()),
11160 VL, VT,DL, DAG, Subtarget);
11161 StepVec = DAG.getNode(ISD::MUL,DL, VT, StepVec, StepVal);
11162 }
11163 }
11164return StepVec;
11165}
11166
11167// Implement vector_reverse using vrgather.vv with indices determined by
11168// subtracting the id of each element from (VLMAX-1). This will convert
11169// the indices like so:
11170// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11171// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11172SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValueOp,
11173SelectionDAG &DAG) const{
11174SDLocDL(Op);
11175MVT VecVT =Op.getSimpleValueType();
11176if (VecVT.getVectorElementType() == MVT::i1) {
11177MVT WidenVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11178SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, WidenVT,Op.getOperand(0));
11179SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE,DL, WidenVT, Op1);
11180return DAG.getSetCC(DL, VecVT, Op2,
11181 DAG.getConstant(0,DL, Op2.getValueType()),ISD::SETNE);
11182 }
11183
11184MVT ContainerVT = VecVT;
11185SDValue Vec =Op.getOperand(0);
11186if (VecVT.isFixedLengthVector()) {
11187 ContainerVT =getContainerForFixedLengthVector(VecVT);
11188 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11189 }
11190
11191MVT XLenVT = Subtarget.getXLenVT();
11192auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);
11193
11194// On some uarchs vrgather.vv will read from every input register for each
11195// output register, regardless of the indices. However to reverse a vector
11196// each output register only needs to read from one register. So decompose it
11197// into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11198// O(LMUL^2).
11199//
11200// vsetvli a1, zero, e64, m4, ta, ma
11201// vrgatherei16.vv v12, v8, v16
11202// ->
11203// vsetvli a1, zero, e64, m1, ta, ma
11204// vrgather.vv v15, v8, v16
11205// vrgather.vv v14, v9, v16
11206// vrgather.vv v13, v10, v16
11207// vrgather.vv v12, v11, v16
11208if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11209 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11210auto [Lo,Hi] = DAG.SplitVector(Vec,DL);
11211Lo = DAG.getNode(ISD::VECTOR_REVERSE,DL,Lo.getSimpleValueType(),Lo);
11212Hi = DAG.getNode(ISD::VECTOR_REVERSE,DL,Hi.getSimpleValueType(),Hi);
11213SDValueConcat = DAG.getNode(ISD::CONCAT_VECTORS,DL, ContainerVT,Hi,Lo);
11214
11215// Fixed length vectors might not fit exactly into their container, and so
11216// leave a gap in the front of the vector after being reversed. Slide this
11217// away.
11218//
11219// x x x x 3 2 1 0 <- v4i16 @ vlen=128
11220// 0 1 2 3 x x x x <- reverse
11221// x x x x 0 1 2 3 <- vslidedown.vx
11222if (VecVT.isFixedLengthVector()) {
11223SDValueOffset = DAG.getNode(
11224ISD::SUB,DL, XLenVT,
11225 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11226 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11227Concat =
11228getVSlidedown(DAG, Subtarget,DL, ContainerVT,
11229 DAG.getUNDEF(ContainerVT),Concat,Offset, Mask, VL);
11230Concat =convertFromScalableVector(VecVT,Concat, DAG, Subtarget);
11231 }
11232returnConcat;
11233 }
11234
11235unsigned EltSize = ContainerVT.getScalarSizeInBits();
11236unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11237unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11238unsigned MaxVLMAX =
11239 VecVT.isFixedLengthVector()
11240 ? VecVT.getVectorNumElements()
11241 :RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11242
11243unsigned GatherOpc =RISCVISD::VRGATHER_VV_VL;
11244MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11245
11246// If this is SEW=8 and VLMAX is potentially more than 256, we need
11247// to use vrgatherei16.vv.
11248if (MaxVLMAX > 256 && EltSize == 8) {
11249// If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11250// Reverse each half, then reassemble them in reverse order.
11251// NOTE: It's also possible that after splitting that VLMAX no longer
11252// requires vrgatherei16.vv.
11253if (MinSize == (8 *RISCV::RVVBitsPerBlock)) {
11254auto [Lo,Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11255auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11256Lo = DAG.getNode(ISD::VECTOR_REVERSE,DL, LoVT,Lo);
11257Hi = DAG.getNode(ISD::VECTOR_REVERSE,DL, HiVT,Hi);
11258// Reassemble the low and high pieces reversed.
11259// FIXME: This is a CONCAT_VECTORS.
11260SDValue Res =
11261 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VecVT, DAG.getUNDEF(VecVT),Hi,
11262 DAG.getVectorIdxConstant(0,DL));
11263return DAG.getNode(
11264ISD::INSERT_SUBVECTOR,DL, VecVT, Res,Lo,
11265 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(),DL));
11266 }
11267
11268// Just promote the int type to i16 which will double the LMUL.
11269 IntVT =MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11270 GatherOpc =RISCVISD::VRGATHEREI16_VV_VL;
11271 }
11272
11273// At LMUL > 1, do the index computation in 16 bits to reduce register
11274// pressure.
11275if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11276 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11277assert(isUInt<16>(MaxVLMAX - 1));// Largest VLMAX is 65536 @ zvl65536b
11278 GatherOpc =RISCVISD::VRGATHEREI16_VV_VL;
11279 IntVT = IntVT.changeVectorElementType(MVT::i16);
11280 }
11281
11282// Calculate VLMAX-1 for the desired SEW.
11283SDValue VLMinus1 = DAG.getNode(
11284ISD::SUB,DL, XLenVT,
11285 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11286 DAG.getConstant(1,DL, XLenVT));
11287
11288// Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11289bool IsRV32E64 =
11290 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11291SDValue SplatVL;
11292if (!IsRV32E64)
11293 SplatVL = DAG.getSplatVector(IntVT,DL, VLMinus1);
11294else
11295 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IntVT, DAG.getUNDEF(IntVT),
11296 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11297
11298SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, IntVT, Mask, VL);
11299SDValue Indices = DAG.getNode(RISCVISD::SUB_VL,DL, IntVT, SplatVL, VID,
11300 DAG.getUNDEF(IntVT), Mask, VL);
11301
11302SDValue Gather = DAG.getNode(GatherOpc,DL, ContainerVT, Vec, Indices,
11303 DAG.getUNDEF(ContainerVT), Mask, VL);
11304if (VecVT.isFixedLengthVector())
11305 Gather =convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11306return Gather;
11307}
11308
11309SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValueOp,
11310SelectionDAG &DAG) const{
11311SDLocDL(Op);
11312SDValue V1 =Op.getOperand(0);
11313SDValueV2 =Op.getOperand(1);
11314MVT XLenVT = Subtarget.getXLenVT();
11315MVT VecVT =Op.getSimpleValueType();
11316
11317SDValue VLMax =computeVLMax(VecVT,DL, DAG);
11318
11319 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11320SDValue DownOffset, UpOffset;
11321if (ImmValue >= 0) {
11322// The operand is a TargetConstant, we need to rebuild it as a regular
11323// constant.
11324 DownOffset = DAG.getConstant(ImmValue,DL, XLenVT);
11325 UpOffset = DAG.getNode(ISD::SUB,DL, XLenVT, VLMax, DownOffset);
11326 }else {
11327// The operand is a TargetConstant, we need to rebuild it as a regular
11328// constant rather than negating the original operand.
11329 UpOffset = DAG.getConstant(-ImmValue,DL, XLenVT);
11330 DownOffset = DAG.getNode(ISD::SUB,DL, XLenVT, VLMax, UpOffset);
11331 }
11332
11333SDValue TrueMask =getAllOnesMask(VecVT, VLMax,DL, DAG);
11334
11335SDValue SlideDown =
11336getVSlidedown(DAG, Subtarget,DL, VecVT, DAG.getUNDEF(VecVT), V1,
11337 DownOffset, TrueMask, UpOffset);
11338returngetVSlideup(DAG, Subtarget,DL, VecVT, SlideDown, V2, UpOffset,
11339 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11340RISCVII::TAIL_AGNOSTIC);
11341}
11342
11343SDValue
11344RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValueOp,
11345SelectionDAG &DAG) const{
11346SDLocDL(Op);
11347auto *Load = cast<LoadSDNode>(Op);
11348
11349assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
11350Load->getMemoryVT(),
11351 *Load->getMemOperand()) &&
11352"Expecting a correctly-aligned load");
11353
11354MVT VT =Op.getSimpleValueType();
11355MVT XLenVT = Subtarget.getXLenVT();
11356MVT ContainerVT =getContainerForFixedLengthVector(VT);
11357
11358// If we know the exact VLEN and our fixed length vector completely fills
11359// the container, use a whole register load instead.
11360constauto [MinVLMAX, MaxVLMAX] =
11361RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11362if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11363getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11364MachineMemOperand *MMO =Load->getMemOperand();
11365SDValue NewLoad =
11366 DAG.getLoad(ContainerVT,DL,Load->getChain(),Load->getBasePtr(),
11367 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11368 MMO->getAAInfo(), MMO->getRanges());
11369SDValueResult =convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11370return DAG.getMergeValues({Result, NewLoad.getValue(1)},DL);
11371 }
11372
11373SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);
11374
11375bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11376SDValue IntID = DAG.getTargetConstant(
11377 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle,DL, XLenVT);
11378SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11379if (!IsMaskOp)
11380 Ops.push_back(DAG.getUNDEF(ContainerVT));
11381 Ops.push_back(Load->getBasePtr());
11382 Ops.push_back(VL);
11383SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11384SDValue NewLoad =
11385 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops,
11386Load->getMemoryVT(),Load->getMemOperand());
11387
11388SDValueResult =convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11389return DAG.getMergeValues({Result, NewLoad.getValue(1)},DL);
11390}
11391
11392SDValue
11393RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValueOp,
11394SelectionDAG &DAG) const{
11395SDLocDL(Op);
11396auto *Store = cast<StoreSDNode>(Op);
11397
11398assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
11399Store->getMemoryVT(),
11400 *Store->getMemOperand()) &&
11401"Expecting a correctly-aligned store");
11402
11403SDValue StoreVal =Store->getValue();
11404MVT VT = StoreVal.getSimpleValueType();
11405MVT XLenVT = Subtarget.getXLenVT();
11406
11407// If the size less than a byte, we need to pad with zeros to make a byte.
11408if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11409 VT = MVT::v8i1;
11410 StoreVal =
11411 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, DAG.getConstant(0,DL, VT),
11412 StoreVal, DAG.getVectorIdxConstant(0,DL));
11413 }
11414
11415MVT ContainerVT =getContainerForFixedLengthVector(VT);
11416
11417SDValue NewValue =
11418convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11419
11420// If we know the exact VLEN and our fixed length vector completely fills
11421// the container, use a whole register store instead.
11422constauto [MinVLMAX, MaxVLMAX] =
11423RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11424if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11425getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11426MachineMemOperand *MMO =Store->getMemOperand();
11427return DAG.getStore(Store->getChain(),DL, NewValue,Store->getBasePtr(),
11428 MMO->getPointerInfo(), MMO->getBaseAlign(),
11429 MMO->getFlags(), MMO->getAAInfo());
11430 }
11431
11432SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);
11433
11434bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11435SDValue IntID = DAG.getTargetConstant(
11436 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse,DL, XLenVT);
11437return DAG.getMemIntrinsicNode(
11438ISD::INTRINSIC_VOID,DL, DAG.getVTList(MVT::Other),
11439 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11440Store->getMemoryVT(),Store->getMemOperand());
11441}
11442
11443SDValue RISCVTargetLowering::lowerMaskedLoad(SDValueOp,
11444SelectionDAG &DAG) const{
11445SDLocDL(Op);
11446MVT VT =Op.getSimpleValueType();
11447
11448constauto *MemSD = cast<MemSDNode>(Op);
11449EVT MemVT = MemSD->getMemoryVT();
11450MachineMemOperand *MMO = MemSD->getMemOperand();
11451SDValue Chain = MemSD->getChain();
11452SDValueBasePtr = MemSD->getBasePtr();
11453
11454SDValueMask, PassThru, VL;
11455bool IsExpandingLoad =false;
11456if (constauto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11457Mask = VPLoad->getMask();
11458 PassThru = DAG.getUNDEF(VT);
11459 VL = VPLoad->getVectorLength();
11460 }else {
11461constauto *MLoad = cast<MaskedLoadSDNode>(Op);
11462Mask = MLoad->getMask();
11463 PassThru = MLoad->getPassThru();
11464 IsExpandingLoad = MLoad->isExpandingLoad();
11465 }
11466
11467bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11468
11469MVT XLenVT = Subtarget.getXLenVT();
11470
11471MVT ContainerVT = VT;
11472if (VT.isFixedLengthVector()) {
11473 ContainerVT =getContainerForFixedLengthVector(VT);
11474 PassThru =convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11475if (!IsUnmasked) {
11476MVT MaskVT =getMaskTypeFor(ContainerVT);
11477Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11478 }
11479 }
11480
11481if (!VL)
11482 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;
11483
11484SDValue ExpandingVL;
11485if (!IsUnmasked && IsExpandingLoad) {
11486 ExpandingVL = VL;
11487 VL =
11488 DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Mask,
11489getAllOnesMask(Mask.getSimpleValueType(), VL,DL, DAG), VL);
11490 }
11491
11492unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11493 : Intrinsic::riscv_vle_mask;
11494SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};
11495if (IntID == Intrinsic::riscv_vle)
11496 Ops.push_back(DAG.getUNDEF(ContainerVT));
11497else
11498 Ops.push_back(PassThru);
11499 Ops.push_back(BasePtr);
11500if (IntID == Intrinsic::riscv_vle_mask)
11501 Ops.push_back(Mask);
11502 Ops.push_back(VL);
11503if (IntID == Intrinsic::riscv_vle_mask)
11504 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT));
11505
11506SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11507
11508SDValueResult =
11509 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops, MemVT, MMO);
11510 Chain =Result.getValue(1);
11511if (ExpandingVL) {
11512MVT IndexVT = ContainerVT;
11513if (ContainerVT.isFloatingPoint())
11514 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11515
11516MVT IndexEltVT = IndexVT.getVectorElementType();
11517bool UseVRGATHEREI16 =false;
11518// If index vector is an i8 vector and the element count exceeds 256, we
11519// should change the element type of index vector to i16 to avoid
11520// overflow.
11521if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11522// FIXME: We need to do vector splitting manually for LMUL=8 cases.
11523assert(getLMUL(IndexVT) !=RISCVII::LMUL_8);
11524 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11525 UseVRGATHEREI16 =true;
11526 }
11527
11528SDValue Iota =
11529 DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, IndexVT,
11530 DAG.getConstant(Intrinsic::riscv_viota,DL, XLenVT),
11531 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11532Result =
11533 DAG.getNode(UseVRGATHEREI16 ?RISCVISD::VRGATHEREI16_VV_VL
11534 :RISCVISD::VRGATHER_VV_VL,
11535DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11536 }
11537
11538if (VT.isFixedLengthVector())
11539Result =convertFromScalableVector(VT, Result, DAG, Subtarget);
11540
11541return DAG.getMergeValues({Result, Chain},DL);
11542}
11543
11544SDValue RISCVTargetLowering::lowerMaskedStore(SDValueOp,
11545SelectionDAG &DAG) const{
11546SDLocDL(Op);
11547
11548constauto *MemSD = cast<MemSDNode>(Op);
11549EVT MemVT = MemSD->getMemoryVT();
11550MachineMemOperand *MMO = MemSD->getMemOperand();
11551SDValue Chain = MemSD->getChain();
11552SDValueBasePtr = MemSD->getBasePtr();
11553SDValue Val,Mask, VL;
11554
11555bool IsCompressingStore =false;
11556if (constauto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11557 Val = VPStore->getValue();
11558Mask = VPStore->getMask();
11559 VL = VPStore->getVectorLength();
11560 }else {
11561constauto *MStore = cast<MaskedStoreSDNode>(Op);
11562 Val = MStore->getValue();
11563Mask = MStore->getMask();
11564 IsCompressingStore = MStore->isCompressingStore();
11565 }
11566
11567bool IsUnmasked =
11568ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11569
11570MVT VT = Val.getSimpleValueType();
11571MVT XLenVT = Subtarget.getXLenVT();
11572
11573MVT ContainerVT = VT;
11574if (VT.isFixedLengthVector()) {
11575 ContainerVT =getContainerForFixedLengthVector(VT);
11576
11577 Val =convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11578if (!IsUnmasked || IsCompressingStore) {
11579MVT MaskVT =getMaskTypeFor(ContainerVT);
11580Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11581 }
11582 }
11583
11584if (!VL)
11585 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;
11586
11587if (IsCompressingStore) {
11588 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, ContainerVT,
11589 DAG.getConstant(Intrinsic::riscv_vcompress,DL, XLenVT),
11590 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11591 VL =
11592 DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Mask,
11593getAllOnesMask(Mask.getSimpleValueType(), VL,DL, DAG), VL);
11594 }
11595
11596unsigned IntID =
11597 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11598SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};
11599 Ops.push_back(Val);
11600 Ops.push_back(BasePtr);
11601if (!IsUnmasked)
11602 Ops.push_back(Mask);
11603 Ops.push_back(VL);
11604
11605return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,DL,
11606 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11607}
11608
11609SDValue RISCVTargetLowering::lowerVectorCompress(SDValueOp,
11610SelectionDAG &DAG) const{
11611SDLocDL(Op);
11612SDValue Val =Op.getOperand(0);
11613SDValueMask =Op.getOperand(1);
11614SDValue Passthru =Op.getOperand(2);
11615
11616MVT VT = Val.getSimpleValueType();
11617MVT XLenVT = Subtarget.getXLenVT();
11618MVT ContainerVT = VT;
11619if (VT.isFixedLengthVector()) {
11620 ContainerVT =getContainerForFixedLengthVector(VT);
11621MVT MaskVT =getMaskTypeFor(ContainerVT);
11622 Val =convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11623Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11624 Passthru =convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11625 }
11626
11627SDValue VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;
11628SDValue Res =
11629 DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, ContainerVT,
11630 DAG.getConstant(Intrinsic::riscv_vcompress,DL, XLenVT),
11631 Passthru, Val, Mask, VL);
11632
11633if (VT.isFixedLengthVector())
11634 Res =convertFromScalableVector(VT, Res, DAG, Subtarget);
11635
11636return Res;
11637}
11638
11639SDValue
11640RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValueOp,
11641SelectionDAG &DAG) const{
11642MVT InVT =Op.getOperand(0).getSimpleValueType();
11643MVT ContainerVT =getContainerForFixedLengthVector(InVT);
11644
11645MVT VT =Op.getSimpleValueType();
11646
11647SDValue Op1 =
11648convertToScalableVector(ContainerVT,Op.getOperand(0), DAG, Subtarget);
11649SDValue Op2 =
11650convertToScalableVector(ContainerVT,Op.getOperand(1), DAG, Subtarget);
11651
11652SDLocDL(Op);
11653auto [Mask, VL] =getDefaultVLOps(VT.getVectorNumElements(), ContainerVT,DL,
11654 DAG, Subtarget);
11655MVT MaskVT =getMaskTypeFor(ContainerVT);
11656
11657SDValueCmp =
11658 DAG.getNode(RISCVISD::SETCC_VL,DL, MaskVT,
11659 {Op1, Op2,Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11660
11661returnconvertFromScalableVector(VT, Cmp, DAG, Subtarget);
11662}
11663
11664SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValueOp,
11665SelectionDAG &DAG) const{
11666unsigned Opc =Op.getOpcode();
11667SDLocDL(Op);
11668SDValue Chain =Op.getOperand(0);
11669SDValue Op1 =Op.getOperand(1);
11670SDValue Op2 =Op.getOperand(2);
11671SDValueCC =Op.getOperand(3);
11672ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11673MVT VT =Op.getSimpleValueType();
11674MVT InVT = Op1.getSimpleValueType();
11675
11676// RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11677// condition code.
11678if (Opc ==ISD::STRICT_FSETCCS) {
11679// Expand strict_fsetccs(x, oeq) to
11680// (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11681SDVTList VTList =Op->getVTList();
11682if (CCVal ==ISD::SETEQ || CCVal ==ISD::SETOEQ) {
11683SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11684SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS,DL, VTList, Chain, Op1,
11685 Op2, OLECCVal);
11686SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS,DL, VTList, Chain, Op2,
11687 Op1, OLECCVal);
11688SDValue OutChain = DAG.getNode(ISD::TokenFactor,DL, MVT::Other,
11689 Tmp1.getValue(1), Tmp2.getValue(1));
11690// Tmp1 and Tmp2 might be the same node.
11691if (Tmp1 != Tmp2)
11692 Tmp1 = DAG.getNode(ISD::AND,DL, VT, Tmp1, Tmp2);
11693return DAG.getMergeValues({Tmp1, OutChain},DL);
11694 }
11695
11696// Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11697if (CCVal ==ISD::SETNE || CCVal ==ISD::SETUNE) {
11698SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11699SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS,DL, VTList, Chain, Op1,
11700 Op2, OEQCCVal);
11701SDValue Res = DAG.getNOT(DL, OEQ, VT);
11702return DAG.getMergeValues({Res, OEQ.getValue(1)},DL);
11703 }
11704 }
11705
11706MVT ContainerInVT = InVT;
11707if (InVT.isFixedLengthVector()) {
11708 ContainerInVT =getContainerForFixedLengthVector(InVT);
11709 Op1 =convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11710 Op2 =convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11711 }
11712MVT MaskVT =getMaskTypeFor(ContainerInVT);
11713
11714auto [Mask, VL] =getDefaultVLOps(InVT, ContainerInVT,DL, DAG, Subtarget);
11715
11716SDValue Res;
11717if (Opc ==ISD::STRICT_FSETCC &&
11718 (CCVal ==ISD::SETLT || CCVal ==ISD::SETOLT || CCVal ==ISD::SETLE ||
11719 CCVal ==ISD::SETOLE)) {
11720// VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11721// active when both input elements are ordered.
11722SDValue True =getAllOnesMask(ContainerInVT, VL,DL, DAG);
11723SDValue OrderMask1 = DAG.getNode(
11724RISCVISD::STRICT_FSETCC_VL,DL, DAG.getVTList(MaskVT, MVT::Other),
11725 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11726 True, VL});
11727SDValue OrderMask2 = DAG.getNode(
11728RISCVISD::STRICT_FSETCC_VL,DL, DAG.getVTList(MaskVT, MVT::Other),
11729 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11730 True, VL});
11731Mask =
11732 DAG.getNode(RISCVISD::VMAND_VL,DL, MaskVT, OrderMask1, OrderMask2, VL);
11733// Use Mask as the passthru operand to let the result be 0 if either of the
11734// inputs is unordered.
11735 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL,DL,
11736 DAG.getVTList(MaskVT, MVT::Other),
11737 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11738 }else {
11739unsigned RVVOpc = Opc ==ISD::STRICT_FSETCC ?RISCVISD::STRICT_FSETCC_VL
11740 :RISCVISD::STRICT_FSETCCS_VL;
11741 Res = DAG.getNode(RVVOpc,DL, DAG.getVTList(MaskVT, MVT::Other),
11742 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11743 }
11744
11745if (VT.isFixedLengthVector()) {
11746SDValue SubVec =convertFromScalableVector(VT, Res, DAG, Subtarget);
11747return DAG.getMergeValues({SubVec, Res.getValue(1)},DL);
11748 }
11749return Res;
11750}
11751
11752// Lower vector ABS to smax(X, sub(0, X)).
11753SDValue RISCVTargetLowering::lowerABS(SDValueOp,SelectionDAG &DAG) const{
11754SDLocDL(Op);
11755MVT VT =Op.getSimpleValueType();
11756SDValueX =Op.getOperand(0);
11757
11758assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11759"Unexpected type for ISD::ABS");
11760
11761MVT ContainerVT = VT;
11762if (VT.isFixedLengthVector()) {
11763 ContainerVT =getContainerForFixedLengthVector(VT);
11764X =convertToScalableVector(ContainerVT,X, DAG, Subtarget);
11765 }
11766
11767SDValueMask, VL;
11768if (Op->getOpcode() == ISD::VP_ABS) {
11769Mask =Op->getOperand(1);
11770if (VT.isFixedLengthVector())
11771Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11772 Subtarget);
11773 VL =Op->getOperand(2);
11774 }else
11775 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
11776
11777SDValue SplatZero = DAG.getNode(
11778RISCVISD::VMV_V_X_VL,DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11779 DAG.getConstant(0,DL, Subtarget.getXLenVT()), VL);
11780SDValue NegX = DAG.getNode(RISCVISD::SUB_VL,DL, ContainerVT, SplatZero,X,
11781 DAG.getUNDEF(ContainerVT), Mask, VL);
11782SDValueMax = DAG.getNode(RISCVISD::SMAX_VL,DL, ContainerVT,X, NegX,
11783 DAG.getUNDEF(ContainerVT), Mask, VL);
11784
11785if (VT.isFixedLengthVector())
11786Max =convertFromScalableVector(VT, Max, DAG, Subtarget);
11787returnMax;
11788}
11789
11790SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11791SDValueOp,SelectionDAG &DAG) const{
11792SDLocDL(Op);
11793MVT VT =Op.getSimpleValueType();
11794SDValue Mag =Op.getOperand(0);
11795SDValue Sign =Op.getOperand(1);
11796assert(Mag.getValueType() == Sign.getValueType() &&
11797"Can only handle COPYSIGN with matching types.");
11798
11799MVT ContainerVT =getContainerForFixedLengthVector(VT);
11800 Mag =convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11801 Sign =convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11802
11803auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
11804
11805SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL,DL, ContainerVT, Mag,
11806 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11807
11808returnconvertFromScalableVector(VT, CopySign, DAG, Subtarget);
11809}
11810
11811SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11812SDValueOp,SelectionDAG &DAG) const{
11813MVT VT =Op.getSimpleValueType();
11814MVT ContainerVT =getContainerForFixedLengthVector(VT);
11815
11816MVT I1ContainerVT =
11817MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11818
11819SDValueCC =
11820convertToScalableVector(I1ContainerVT,Op.getOperand(0), DAG, Subtarget);
11821SDValue Op1 =
11822convertToScalableVector(ContainerVT,Op.getOperand(1), DAG, Subtarget);
11823SDValue Op2 =
11824convertToScalableVector(ContainerVT,Op.getOperand(2), DAG, Subtarget);
11825
11826SDLocDL(Op);
11827SDValue VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;
11828
11829SDValueSelect = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT,CC, Op1,
11830 Op2, DAG.getUNDEF(ContainerVT), VL);
11831
11832returnconvertFromScalableVector(VT,Select, DAG, Subtarget);
11833}
11834
11835SDValue RISCVTargetLowering::lowerToScalableOp(SDValueOp,
11836SelectionDAG &DAG) const{
11837unsigned NewOpc =getRISCVVLOp(Op);
11838bool HasPassthruOp =hasPassthruOp(NewOpc);
11839bool HasMask =hasMaskOp(NewOpc);
11840
11841MVT VT =Op.getSimpleValueType();
11842MVT ContainerVT =getContainerForFixedLengthVector(VT);
11843
11844// Create list of operands by converting existing ones to scalable types.
11845SmallVector<SDValue, 6> Ops;
11846for (constSDValue &V :Op->op_values()) {
11847assert(!isa<VTSDNode>(V) &&"Unexpected VTSDNode node!");
11848
11849// Pass through non-vector operands.
11850if (!V.getValueType().isVector()) {
11851 Ops.push_back(V);
11852continue;
11853 }
11854
11855// "cast" fixed length vector to a scalable vector.
11856assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11857"Only fixed length vectors are supported!");
11858 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11859 }
11860
11861SDLocDL(Op);
11862auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);
11863if (HasPassthruOp)
11864 Ops.push_back(DAG.getUNDEF(ContainerVT));
11865if (HasMask)
11866 Ops.push_back(Mask);
11867 Ops.push_back(VL);
11868
11869// StrictFP operations have two result values. Their lowered result should
11870// have same result count.
11871if (Op->isStrictFPOpcode()) {
11872SDValue ScalableRes =
11873 DAG.getNode(NewOpc,DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11874Op->getFlags());
11875SDValue SubVec =convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11876return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)},DL);
11877 }
11878
11879SDValue ScalableRes =
11880 DAG.getNode(NewOpc,DL, ContainerVT, Ops,Op->getFlags());
11881returnconvertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11882}
11883
11884// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11885// * Operands of each node are assumed to be in the same order.
11886// * The EVL operand is promoted from i32 to i64 on RV64.
11887// * Fixed-length vectors are converted to their scalable-vector container
11888// types.
11889SDValue RISCVTargetLowering::lowerVPOp(SDValueOp,SelectionDAG &DAG) const{
11890unsigned RISCVISDOpc =getRISCVVLOp(Op);
11891bool HasPassthruOp =hasPassthruOp(RISCVISDOpc);
11892
11893SDLocDL(Op);
11894MVT VT =Op.getSimpleValueType();
11895SmallVector<SDValue, 4> Ops;
11896
11897MVT ContainerVT = VT;
11898if (VT.isFixedLengthVector())
11899 ContainerVT =getContainerForFixedLengthVector(VT);
11900
11901for (constauto &OpIdx :enumerate(Op->ops())) {
11902SDValueV = OpIdx.value();
11903assert(!isa<VTSDNode>(V) &&"Unexpected VTSDNode node!");
11904// Add dummy passthru value before the mask. Or if there isn't a mask,
11905// before EVL.
11906if (HasPassthruOp) {
11907auto MaskIdx =ISD::getVPMaskIdx(Op.getOpcode());
11908if (MaskIdx) {
11909if (*MaskIdx == OpIdx.index())
11910 Ops.push_back(DAG.getUNDEF(ContainerVT));
11911 }elseif (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11912 OpIdx.index()) {
11913if (Op.getOpcode() == ISD::VP_MERGE) {
11914// For VP_MERGE, copy the false operand instead of an undef value.
11915 Ops.push_back(Ops.back());
11916 }else {
11917assert(Op.getOpcode() == ISD::VP_SELECT);
11918// For VP_SELECT, add an undef value.
11919 Ops.push_back(DAG.getUNDEF(ContainerVT));
11920 }
11921 }
11922 }
11923// VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11924if (RISCVISDOpc ==RISCVISD::VFCVT_RM_X_F_VL &&
11925ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11926 Ops.push_back(DAG.getTargetConstant(RISCVFPRndMode::DYN,DL,
11927 Subtarget.getXLenVT()));
11928// Pass through operands which aren't fixed-length vectors.
11929if (!V.getValueType().isFixedLengthVector()) {
11930 Ops.push_back(V);
11931continue;
11932 }
11933// "cast" fixed length vector to a scalable vector.
11934MVT OpVT =V.getSimpleValueType();
11935MVT ContainerVT =getContainerForFixedLengthVector(OpVT);
11936assert(useRVVForFixedLengthVectorVT(OpVT) &&
11937"Only fixed length vectors are supported!");
11938 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11939 }
11940
11941if (!VT.isFixedLengthVector())
11942return DAG.getNode(RISCVISDOpc,DL, VT, Ops,Op->getFlags());
11943
11944SDValue VPOp = DAG.getNode(RISCVISDOpc,DL, ContainerVT, Ops,Op->getFlags());
11945
11946returnconvertFromScalableVector(VT, VPOp, DAG, Subtarget);
11947}
11948
11949SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValueOp,
11950SelectionDAG &DAG) const{
11951SDLocDL(Op);
11952MVT VT =Op.getSimpleValueType();
11953
11954SDValue Src =Op.getOperand(0);
11955// NOTE: Mask is dropped.
11956SDValue VL =Op.getOperand(2);
11957
11958MVT ContainerVT = VT;
11959if (VT.isFixedLengthVector()) {
11960 ContainerVT =getContainerForFixedLengthVector(VT);
11961MVT SrcVT =MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11962 Src =convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11963 }
11964
11965MVT XLenVT = Subtarget.getXLenVT();
11966SDValueZero = DAG.getConstant(0,DL, XLenVT);
11967SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
11968 DAG.getUNDEF(ContainerVT), Zero, VL);
11969
11970SDValue SplatValue = DAG.getSignedConstant(
11971Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1,DL, XLenVT);
11972SDValueSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
11973 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11974
11975SDValueResult = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, Src,Splat,
11976 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11977if (!VT.isFixedLengthVector())
11978returnResult;
11979returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
11980}
11981
11982SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValueOp,
11983SelectionDAG &DAG) const{
11984SDLocDL(Op);
11985MVT VT =Op.getSimpleValueType();
11986
11987SDValue Op1 =Op.getOperand(0);
11988SDValue Op2 =Op.getOperand(1);
11989ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11990// NOTE: Mask is dropped.
11991SDValue VL =Op.getOperand(4);
11992
11993MVT ContainerVT = VT;
11994if (VT.isFixedLengthVector()) {
11995 ContainerVT =getContainerForFixedLengthVector(VT);
11996 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11997 Op2 =convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11998 }
11999
12000SDValueResult;
12001SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);
12002
12003switch (Condition) {
12004default:
12005break;
12006// X != Y --> (X^Y)
12007caseISD::SETNE:
12008Result = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, Op2, VL);
12009break;
12010// X == Y --> ~(X^Y)
12011caseISD::SETEQ: {
12012SDValue Temp =
12013 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, Op2, VL);
12014Result =
12015 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Temp, AllOneMask, VL);
12016break;
12017 }
12018// X >s Y --> X == 0 & Y == 1 --> ~X & Y
12019// X <u Y --> X == 0 & Y == 1 --> ~X & Y
12020caseISD::SETGT:
12021caseISD::SETULT: {
12022SDValue Temp =
12023 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, AllOneMask, VL);
12024Result = DAG.getNode(RISCVISD::VMAND_VL,DL, ContainerVT, Temp, Op2, VL);
12025break;
12026 }
12027// X <s Y --> X == 1 & Y == 0 --> ~Y & X
12028// X >u Y --> X == 1 & Y == 0 --> ~Y & X
12029caseISD::SETLT:
12030caseISD::SETUGT: {
12031SDValue Temp =
12032 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op2, AllOneMask, VL);
12033Result = DAG.getNode(RISCVISD::VMAND_VL,DL, ContainerVT, Op1, Temp, VL);
12034break;
12035 }
12036// X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12037// X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12038caseISD::SETGE:
12039caseISD::SETULE: {
12040SDValue Temp =
12041 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, AllOneMask, VL);
12042Result = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Temp, Op2, VL);
12043break;
12044 }
12045// X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12046// X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12047caseISD::SETLE:
12048caseISD::SETUGE: {
12049SDValue Temp =
12050 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op2, AllOneMask, VL);
12051Result = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Temp, Op1, VL);
12052break;
12053 }
12054 }
12055
12056if (!VT.isFixedLengthVector())
12057returnResult;
12058returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
12059}
12060
12061// Lower Floating-Point/Integer Type-Convert VP SDNodes
12062SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValueOp,
12063SelectionDAG &DAG) const{
12064SDLocDL(Op);
12065
12066SDValue Src =Op.getOperand(0);
12067SDValueMask =Op.getOperand(1);
12068SDValue VL =Op.getOperand(2);
12069unsigned RISCVISDOpc =getRISCVVLOp(Op);
12070
12071MVT DstVT =Op.getSimpleValueType();
12072MVT SrcVT = Src.getSimpleValueType();
12073if (DstVT.isFixedLengthVector()) {
12074 DstVT =getContainerForFixedLengthVector(DstVT);
12075 SrcVT =getContainerForFixedLengthVector(SrcVT);
12076 Src =convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12077MVT MaskVT =getMaskTypeFor(DstVT);
12078Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12079 }
12080
12081unsigned DstEltSize = DstVT.getScalarSizeInBits();
12082unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12083
12084SDValueResult;
12085if (DstEltSize >= SrcEltSize) {// Single-width and widening conversion.
12086if (SrcVT.isInteger()) {
12087assert(DstVT.isFloatingPoint() &&"Wrong input/output vector types");
12088
12089unsigned RISCVISDExtOpc = RISCVISDOpc ==RISCVISD::SINT_TO_FP_VL
12090 ?RISCVISD::VSEXT_VL
12091 :RISCVISD::VZEXT_VL;
12092
12093// Do we need to do any pre-widening before converting?
12094if (SrcEltSize == 1) {
12095MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12096MVT XLenVT = Subtarget.getXLenVT();
12097SDValueZero = DAG.getConstant(0,DL, XLenVT);
12098SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IntVT,
12099 DAG.getUNDEF(IntVT), Zero, VL);
12100SDValue One = DAG.getSignedConstant(
12101 RISCVISDExtOpc ==RISCVISD::VZEXT_VL ? 1 : -1,DL, XLenVT);
12102SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IntVT,
12103 DAG.getUNDEF(IntVT), One, VL);
12104 Src = DAG.getNode(RISCVISD::VMERGE_VL,DL, IntVT, Src, OneSplat,
12105 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12106 }elseif (DstEltSize > (2 * SrcEltSize)) {
12107// Widen before converting.
12108MVT IntVT =MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12109 DstVT.getVectorElementCount());
12110 Src = DAG.getNode(RISCVISDExtOpc,DL, IntVT, Src, Mask, VL);
12111 }
12112
12113Result = DAG.getNode(RISCVISDOpc,DL, DstVT, Src, Mask, VL);
12114 }else {
12115assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12116"Wrong input/output vector types");
12117
12118// Convert f16 to f32 then convert f32 to i64.
12119if (DstEltSize > (2 * SrcEltSize)) {
12120assert(SrcVT.getVectorElementType() == MVT::f16 &&"Unexpected type!");
12121MVT InterimFVT =
12122MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12123 Src =
12124 DAG.getNode(RISCVISD::FP_EXTEND_VL,DL, InterimFVT, Src, Mask, VL);
12125 }
12126
12127Result = DAG.getNode(RISCVISDOpc,DL, DstVT, Src, Mask, VL);
12128 }
12129 }else {// Narrowing + Conversion
12130if (SrcVT.isInteger()) {
12131assert(DstVT.isFloatingPoint() &&"Wrong input/output vector types");
12132// First do a narrowing convert to an FP type half the size, then round
12133// the FP type to a small FP type if needed.
12134
12135MVT InterimFVT = DstVT;
12136if (SrcEltSize > (2 * DstEltSize)) {
12137assert(SrcEltSize == (4 * DstEltSize) &&"Unexpected types!");
12138assert(DstVT.getVectorElementType() == MVT::f16 &&"Unexpected type!");
12139 InterimFVT =MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12140 }
12141
12142Result = DAG.getNode(RISCVISDOpc,DL, InterimFVT, Src, Mask, VL);
12143
12144if (InterimFVT != DstVT) {
12145 Src =Result;
12146Result = DAG.getNode(RISCVISD::FP_ROUND_VL,DL, DstVT, Src, Mask, VL);
12147 }
12148 }else {
12149assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12150"Wrong input/output vector types");
12151// First do a narrowing conversion to an integer half the size, then
12152// truncate if needed.
12153
12154if (DstEltSize == 1) {
12155// First convert to the same size integer, then convert to mask using
12156// setcc.
12157assert(SrcEltSize >= 16 &&"Unexpected FP type!");
12158MVT InterimIVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12159 DstVT.getVectorElementCount());
12160Result = DAG.getNode(RISCVISDOpc,DL, InterimIVT, Src, Mask, VL);
12161
12162// Compare the integer result to 0. The integer should be 0 or 1/-1,
12163// otherwise the conversion was undefined.
12164MVT XLenVT = Subtarget.getXLenVT();
12165SDValue SplatZero = DAG.getConstant(0,DL, XLenVT);
12166 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, InterimIVT,
12167 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12168Result = DAG.getNode(RISCVISD::SETCC_VL,DL, DstVT,
12169 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12170 DAG.getUNDEF(DstVT), Mask, VL});
12171 }else {
12172MVT InterimIVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12173 DstVT.getVectorElementCount());
12174
12175Result = DAG.getNode(RISCVISDOpc,DL, InterimIVT, Src, Mask, VL);
12176
12177while (InterimIVT != DstVT) {
12178 SrcEltSize /= 2;
12179 Src =Result;
12180 InterimIVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12181 DstVT.getVectorElementCount());
12182Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL,DL, InterimIVT,
12183 Src, Mask, VL);
12184 }
12185 }
12186 }
12187 }
12188
12189MVT VT =Op.getSimpleValueType();
12190if (!VT.isFixedLengthVector())
12191returnResult;
12192returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
12193}
12194
12195SDValue RISCVTargetLowering::lowerVPMergeMask(SDValueOp,
12196SelectionDAG &DAG) const{
12197SDLocDL(Op);
12198MVT VT =Op.getSimpleValueType();
12199MVT XLenVT = Subtarget.getXLenVT();
12200
12201SDValueMask =Op.getOperand(0);
12202SDValueTrueVal =Op.getOperand(1);
12203SDValueFalseVal =Op.getOperand(2);
12204SDValue VL =Op.getOperand(3);
12205
12206// Use default legalization if a vector of EVL type would be legal.
12207EVT EVLVecVT =EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12208 VT.getVectorElementCount());
12209if (isTypeLegal(EVLVecVT))
12210returnSDValue();
12211
12212MVT ContainerVT = VT;
12213if (VT.isFixedLengthVector()) {
12214 ContainerVT =getContainerForFixedLengthVector(VT);
12215Mask =convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12216TrueVal =convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12217FalseVal =convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12218 }
12219
12220// Promote to a vector of i8.
12221MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12222
12223// Promote TrueVal and FalseVal using VLMax.
12224// FIXME: Is there a better way to do this?
12225SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12226SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, PromotedVT,
12227 DAG.getUNDEF(PromotedVT),
12228 DAG.getConstant(1,DL, XLenVT), VLMax);
12229SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, PromotedVT,
12230 DAG.getUNDEF(PromotedVT),
12231 DAG.getConstant(0,DL, XLenVT), VLMax);
12232TrueVal = DAG.getNode(RISCVISD::VMERGE_VL,DL, PromotedVT, TrueVal, SplatOne,
12233 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12234// Any element past VL uses FalseVal, so use VLMax
12235FalseVal = DAG.getNode(RISCVISD::VMERGE_VL,DL, PromotedVT, FalseVal,
12236 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12237
12238// VP_MERGE the two promoted values.
12239SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL,DL, PromotedVT, Mask,
12240 TrueVal, FalseVal, FalseVal, VL);
12241
12242// Convert back to mask.
12243SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);
12244SDValueResult = DAG.getNode(
12245RISCVISD::SETCC_VL,DL, ContainerVT,
12246 {VPMerge, DAG.getConstant(0,DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12247 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12248
12249if (VT.isFixedLengthVector())
12250Result =convertFromScalableVector(VT, Result, DAG, Subtarget);
12251returnResult;
12252}
12253
12254SDValue
12255RISCVTargetLowering::lowerVPSpliceExperimental(SDValueOp,
12256SelectionDAG &DAG) const{
12257SDLocDL(Op);
12258
12259SDValue Op1 =Op.getOperand(0);
12260SDValue Op2 =Op.getOperand(1);
12261SDValueOffset =Op.getOperand(2);
12262SDValueMask =Op.getOperand(3);
12263SDValue EVL1 =Op.getOperand(4);
12264SDValue EVL2 =Op.getOperand(5);
12265
12266constMVT XLenVT = Subtarget.getXLenVT();
12267MVT VT =Op.getSimpleValueType();
12268MVT ContainerVT = VT;
12269if (VT.isFixedLengthVector()) {
12270 ContainerVT =getContainerForFixedLengthVector(VT);
12271 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12272 Op2 =convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12273MVT MaskVT =getMaskTypeFor(ContainerVT);
12274Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12275 }
12276
12277bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12278if (IsMaskVector) {
12279 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12280
12281// Expand input operands
12282SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
12283 DAG.getUNDEF(ContainerVT),
12284 DAG.getConstant(1,DL, XLenVT), EVL1);
12285SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
12286 DAG.getUNDEF(ContainerVT),
12287 DAG.getConstant(0,DL, XLenVT), EVL1);
12288 Op1 = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, Op1, SplatOneOp1,
12289 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12290
12291SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
12292 DAG.getUNDEF(ContainerVT),
12293 DAG.getConstant(1,DL, XLenVT), EVL2);
12294SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
12295 DAG.getUNDEF(ContainerVT),
12296 DAG.getConstant(0,DL, XLenVT), EVL2);
12297 Op2 = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, Op2, SplatOneOp2,
12298 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12299 }
12300
12301 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12302SDValue DownOffset, UpOffset;
12303if (ImmValue >= 0) {
12304// The operand is a TargetConstant, we need to rebuild it as a regular
12305// constant.
12306 DownOffset = DAG.getConstant(ImmValue,DL, XLenVT);
12307 UpOffset = DAG.getNode(ISD::SUB,DL, XLenVT, EVL1, DownOffset);
12308 }else {
12309// The operand is a TargetConstant, we need to rebuild it as a regular
12310// constant rather than negating the original operand.
12311 UpOffset = DAG.getConstant(-ImmValue,DL, XLenVT);
12312 DownOffset = DAG.getNode(ISD::SUB,DL, XLenVT, EVL1, UpOffset);
12313 }
12314
12315SDValue SlideDown =
12316getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12317 Op1, DownOffset, Mask, UpOffset);
12318SDValueResult =getVSlideup(DAG, Subtarget,DL, ContainerVT, SlideDown, Op2,
12319 UpOffset, Mask, EVL2,RISCVII::TAIL_AGNOSTIC);
12320
12321if (IsMaskVector) {
12322// Truncate Result back to a mask vector (Result has same EVL as Op2)
12323Result = DAG.getNode(
12324RISCVISD::SETCC_VL,DL, ContainerVT.changeVectorElementType(MVT::i1),
12325 {Result, DAG.getConstant(0, DL, ContainerVT),
12326 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12327 Mask, EVL2});
12328 }
12329
12330if (!VT.isFixedLengthVector())
12331returnResult;
12332returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
12333}
12334
12335SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValueOp,
12336SelectionDAG &DAG) const{
12337SDLocDL(Op);
12338SDValue Val =Op.getOperand(0);
12339SDValueMask =Op.getOperand(1);
12340SDValue VL =Op.getOperand(2);
12341MVT VT =Op.getSimpleValueType();
12342
12343MVT ContainerVT = VT;
12344if (VT.isFixedLengthVector()) {
12345 ContainerVT =getContainerForFixedLengthVector(VT);
12346MVT MaskVT =getMaskTypeFor(ContainerVT);
12347Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12348 }
12349
12350SDValueResult =
12351lowerScalarSplat(SDValue(), Val, VL, ContainerVT,DL, DAG, Subtarget);
12352
12353if (!VT.isFixedLengthVector())
12354returnResult;
12355returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
12356}
12357
12358SDValue
12359RISCVTargetLowering::lowerVPReverseExperimental(SDValueOp,
12360SelectionDAG &DAG) const{
12361SDLocDL(Op);
12362MVT VT =Op.getSimpleValueType();
12363MVT XLenVT = Subtarget.getXLenVT();
12364
12365SDValue Op1 =Op.getOperand(0);
12366SDValueMask =Op.getOperand(1);
12367SDValue EVL =Op.getOperand(2);
12368
12369MVT ContainerVT = VT;
12370if (VT.isFixedLengthVector()) {
12371 ContainerVT =getContainerForFixedLengthVector(VT);
12372 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12373MVT MaskVT =getMaskTypeFor(ContainerVT);
12374Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12375 }
12376
12377MVT GatherVT = ContainerVT;
12378MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12379// Check if we are working with mask vectors
12380bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12381if (IsMaskVector) {
12382 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12383
12384// Expand input operand
12385SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IndicesVT,
12386 DAG.getUNDEF(IndicesVT),
12387 DAG.getConstant(1,DL, XLenVT), EVL);
12388SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IndicesVT,
12389 DAG.getUNDEF(IndicesVT),
12390 DAG.getConstant(0,DL, XLenVT), EVL);
12391 Op1 = DAG.getNode(RISCVISD::VMERGE_VL,DL, IndicesVT, Op1, SplatOne,
12392 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12393 }
12394
12395unsigned EltSize = GatherVT.getScalarSizeInBits();
12396unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12397unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12398unsigned MaxVLMAX =
12399RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12400
12401unsigned GatherOpc =RISCVISD::VRGATHER_VV_VL;
12402// If this is SEW=8 and VLMAX is unknown or more than 256, we need
12403// to use vrgatherei16.vv.
12404// TODO: It's also possible to use vrgatherei16.vv for other types to
12405// decrease register width for the index calculation.
12406// NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12407if (MaxVLMAX > 256 && EltSize == 8) {
12408// If this is LMUL=8, we have to split before using vrgatherei16.vv.
12409// Split the vector in half and reverse each half using a full register
12410// reverse.
12411// Swap the halves and concatenate them.
12412// Slide the concatenated result by (VLMax - VL).
12413if (MinSize == (8 *RISCV::RVVBitsPerBlock)) {
12414auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12415auto [Lo,Hi] = DAG.SplitVector(Op1,DL);
12416
12417SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE,DL, LoVT,Lo);
12418SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE,DL, HiVT,Hi);
12419
12420// Reassemble the low and high pieces reversed.
12421// NOTE: this Result is unmasked (because we do not need masks for
12422// shuffles). If in the future this has to change, we can use a SELECT_VL
12423// between Result and UNDEF using the mask originally passed to VP_REVERSE
12424SDValueResult =
12425 DAG.getNode(ISD::CONCAT_VECTORS,DL, GatherVT, HiRev, LoRev);
12426
12427// Slide off any elements from past EVL that were reversed into the low
12428// elements.
12429unsigned MinElts = GatherVT.getVectorMinNumElements();
12430SDValue VLMax =
12431 DAG.getVScale(DL, XLenVT,APInt(XLenVT.getSizeInBits(), MinElts));
12432SDValue Diff = DAG.getNode(ISD::SUB,DL, XLenVT, VLMax, EVL);
12433
12434Result =getVSlidedown(DAG, Subtarget,DL, GatherVT,
12435 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12436
12437if (IsMaskVector) {
12438// Truncate Result back to a mask vector
12439Result =
12440 DAG.getNode(RISCVISD::SETCC_VL,DL, ContainerVT,
12441 {Result, DAG.getConstant(0,DL, GatherVT),
12442 DAG.getCondCode(ISD::SETNE),
12443 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12444 }
12445
12446if (!VT.isFixedLengthVector())
12447returnResult;
12448returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
12449 }
12450
12451// Just promote the int type to i16 which will double the LMUL.
12452 IndicesVT =MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12453 GatherOpc =RISCVISD::VRGATHEREI16_VV_VL;
12454 }
12455
12456SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, IndicesVT, Mask, EVL);
12457SDValue VecLen =
12458 DAG.getNode(ISD::SUB,DL, XLenVT, EVL, DAG.getConstant(1,DL, XLenVT));
12459SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IndicesVT,
12460 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12461SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL,DL, IndicesVT, VecLenSplat, VID,
12462 DAG.getUNDEF(IndicesVT), Mask, EVL);
12463SDValueResult = DAG.getNode(GatherOpc,DL, GatherVT, Op1, VRSUB,
12464 DAG.getUNDEF(GatherVT), Mask, EVL);
12465
12466if (IsMaskVector) {
12467// Truncate Result back to a mask vector
12468Result = DAG.getNode(
12469RISCVISD::SETCC_VL,DL, ContainerVT,
12470 {Result, DAG.getConstant(0,DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12471 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12472 }
12473
12474if (!VT.isFixedLengthVector())
12475returnResult;
12476returnconvertFromScalableVector(VT, Result, DAG, Subtarget);
12477}
12478
12479SDValue RISCVTargetLowering::lowerLogicVPOp(SDValueOp,
12480SelectionDAG &DAG) const{
12481MVT VT =Op.getSimpleValueType();
12482if (VT.getVectorElementType() != MVT::i1)
12483return lowerVPOp(Op, DAG);
12484
12485// It is safe to drop mask parameter as masked-off elements are undef.
12486SDValue Op1 =Op->getOperand(0);
12487SDValue Op2 =Op->getOperand(1);
12488SDValue VL =Op->getOperand(3);
12489
12490MVT ContainerVT = VT;
12491constbool IsFixed = VT.isFixedLengthVector();
12492if (IsFixed) {
12493 ContainerVT =getContainerForFixedLengthVector(VT);
12494 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12495 Op2 =convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12496 }
12497
12498SDLocDL(Op);
12499SDValue Val = DAG.getNode(getRISCVVLOp(Op),DL, ContainerVT, Op1, Op2, VL);
12500if (!IsFixed)
12501return Val;
12502returnconvertFromScalableVector(VT, Val, DAG, Subtarget);
12503}
12504
12505SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValueOp,
12506SelectionDAG &DAG) const{
12507SDLocDL(Op);
12508MVT XLenVT = Subtarget.getXLenVT();
12509MVT VT =Op.getSimpleValueType();
12510MVT ContainerVT = VT;
12511if (VT.isFixedLengthVector())
12512 ContainerVT =getContainerForFixedLengthVector(VT);
12513
12514SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12515
12516auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12517// Check if the mask is known to be all ones
12518SDValueMask = VPNode->getMask();
12519bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12520
12521SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12522 : Intrinsic::riscv_vlse_mask,
12523DL, XLenVT);
12524SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12525 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12526 VPNode->getStride()};
12527if (!IsUnmasked) {
12528if (VT.isFixedLengthVector()) {
12529MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12530Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12531 }
12532 Ops.push_back(Mask);
12533 }
12534 Ops.push_back(VPNode->getVectorLength());
12535if (!IsUnmasked) {
12536SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT);
12537 Ops.push_back(Policy);
12538 }
12539
12540SDValueResult =
12541 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops,
12542 VPNode->getMemoryVT(), VPNode->getMemOperand());
12543SDValue Chain =Result.getValue(1);
12544
12545if (VT.isFixedLengthVector())
12546Result =convertFromScalableVector(VT, Result, DAG, Subtarget);
12547
12548return DAG.getMergeValues({Result, Chain},DL);
12549}
12550
12551SDValue RISCVTargetLowering::lowerVPStridedStore(SDValueOp,
12552SelectionDAG &DAG) const{
12553SDLocDL(Op);
12554MVT XLenVT = Subtarget.getXLenVT();
12555
12556auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12557SDValue StoreVal = VPNode->getValue();
12558MVT VT = StoreVal.getSimpleValueType();
12559MVT ContainerVT = VT;
12560if (VT.isFixedLengthVector()) {
12561 ContainerVT =getContainerForFixedLengthVector(VT);
12562 StoreVal =convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12563 }
12564
12565// Check if the mask is known to be all ones
12566SDValueMask = VPNode->getMask();
12567bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12568
12569SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12570 : Intrinsic::riscv_vsse_mask,
12571DL, XLenVT);
12572SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12573 VPNode->getBasePtr(), VPNode->getStride()};
12574if (!IsUnmasked) {
12575if (VT.isFixedLengthVector()) {
12576MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12577Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12578 }
12579 Ops.push_back(Mask);
12580 }
12581 Ops.push_back(VPNode->getVectorLength());
12582
12583return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,DL, VPNode->getVTList(),
12584 Ops, VPNode->getMemoryVT(),
12585 VPNode->getMemOperand());
12586}
12587
12588// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12589// matched to a RVV indexed load. The RVV indexed load instructions only
12590// support the "unsigned unscaled" addressing mode; indices are implicitly
12591// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12592// signed or scaled indexing is extended to the XLEN value type and scaled
12593// accordingly.
12594SDValue RISCVTargetLowering::lowerMaskedGather(SDValueOp,
12595SelectionDAG &DAG) const{
12596SDLocDL(Op);
12597MVT VT =Op.getSimpleValueType();
12598
12599constauto *MemSD = cast<MemSDNode>(Op.getNode());
12600EVT MemVT = MemSD->getMemoryVT();
12601MachineMemOperand *MMO = MemSD->getMemOperand();
12602SDValue Chain = MemSD->getChain();
12603SDValueBasePtr = MemSD->getBasePtr();
12604
12605 [[maybe_unused]]ISD::LoadExtTypeLoadExtType;
12606SDValueIndex,Mask, PassThru, VL;
12607
12608if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12609Index = VPGN->getIndex();
12610Mask = VPGN->getMask();
12611 PassThru = DAG.getUNDEF(VT);
12612 VL = VPGN->getVectorLength();
12613// VP doesn't support extending loads.
12614LoadExtType =ISD::NON_EXTLOAD;
12615 }else {
12616// Else it must be a MGATHER.
12617auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12618Index = MGN->getIndex();
12619Mask = MGN->getMask();
12620 PassThru = MGN->getPassThru();
12621LoadExtType = MGN->getExtensionType();
12622 }
12623
12624MVT IndexVT =Index.getSimpleValueType();
12625MVT XLenVT = Subtarget.getXLenVT();
12626
12627assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
12628"Unexpected VTs!");
12629assert(BasePtr.getSimpleValueType() == XLenVT &&"Unexpected pointer type");
12630// Targets have to explicitly opt-in for extending vector loads.
12631assert(LoadExtType ==ISD::NON_EXTLOAD &&
12632"Unexpected extending MGATHER/VP_GATHER");
12633
12634// If the mask is known to be all ones, optimize to an unmasked intrinsic;
12635// the selection of the masked intrinsics doesn't do this for us.
12636bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12637
12638MVT ContainerVT = VT;
12639if (VT.isFixedLengthVector()) {
12640 ContainerVT =getContainerForFixedLengthVector(VT);
12641 IndexVT =MVT::getVectorVT(IndexVT.getVectorElementType(),
12642 ContainerVT.getVectorElementCount());
12643
12644Index =convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12645
12646if (!IsUnmasked) {
12647MVT MaskVT =getMaskTypeFor(ContainerVT);
12648Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12649 PassThru =convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12650 }
12651 }
12652
12653if (!VL)
12654 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;
12655
12656if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12657 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12658Index = DAG.getNode(ISD::TRUNCATE,DL, IndexVT, Index);
12659 }
12660
12661unsigned IntID =
12662 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12663SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};
12664if (IsUnmasked)
12665 Ops.push_back(DAG.getUNDEF(ContainerVT));
12666else
12667 Ops.push_back(PassThru);
12668 Ops.push_back(BasePtr);
12669 Ops.push_back(Index);
12670if (!IsUnmasked)
12671 Ops.push_back(Mask);
12672 Ops.push_back(VL);
12673if (!IsUnmasked)
12674 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT));
12675
12676SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12677SDValueResult =
12678 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops, MemVT, MMO);
12679 Chain =Result.getValue(1);
12680
12681if (VT.isFixedLengthVector())
12682Result =convertFromScalableVector(VT, Result, DAG, Subtarget);
12683
12684return DAG.getMergeValues({Result, Chain},DL);
12685}
12686
12687// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12688// matched to a RVV indexed store. The RVV indexed store instructions only
12689// support the "unsigned unscaled" addressing mode; indices are implicitly
12690// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12691// signed or scaled indexing is extended to the XLEN value type and scaled
12692// accordingly.
12693SDValue RISCVTargetLowering::lowerMaskedScatter(SDValueOp,
12694SelectionDAG &DAG) const{
12695SDLocDL(Op);
12696constauto *MemSD = cast<MemSDNode>(Op.getNode());
12697EVT MemVT = MemSD->getMemoryVT();
12698MachineMemOperand *MMO = MemSD->getMemOperand();
12699SDValue Chain = MemSD->getChain();
12700SDValueBasePtr = MemSD->getBasePtr();
12701
12702 [[maybe_unused]]bool IsTruncatingStore =false;
12703SDValueIndex,Mask, Val, VL;
12704
12705if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12706Index = VPSN->getIndex();
12707Mask = VPSN->getMask();
12708 Val = VPSN->getValue();
12709 VL = VPSN->getVectorLength();
12710// VP doesn't support truncating stores.
12711 IsTruncatingStore =false;
12712 }else {
12713// Else it must be a MSCATTER.
12714auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12715Index = MSN->getIndex();
12716Mask = MSN->getMask();
12717 Val = MSN->getValue();
12718 IsTruncatingStore = MSN->isTruncatingStore();
12719 }
12720
12721MVT VT = Val.getSimpleValueType();
12722MVT IndexVT =Index.getSimpleValueType();
12723MVT XLenVT = Subtarget.getXLenVT();
12724
12725assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
12726"Unexpected VTs!");
12727assert(BasePtr.getSimpleValueType() == XLenVT &&"Unexpected pointer type");
12728// Targets have to explicitly opt-in for extending vector loads and
12729// truncating vector stores.
12730assert(!IsTruncatingStore &&"Unexpected truncating MSCATTER/VP_SCATTER");
12731
12732// If the mask is known to be all ones, optimize to an unmasked intrinsic;
12733// the selection of the masked intrinsics doesn't do this for us.
12734bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12735
12736MVT ContainerVT = VT;
12737if (VT.isFixedLengthVector()) {
12738 ContainerVT =getContainerForFixedLengthVector(VT);
12739 IndexVT =MVT::getVectorVT(IndexVT.getVectorElementType(),
12740 ContainerVT.getVectorElementCount());
12741
12742Index =convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12743 Val =convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12744
12745if (!IsUnmasked) {
12746MVT MaskVT =getMaskTypeFor(ContainerVT);
12747Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12748 }
12749 }
12750
12751if (!VL)
12752 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;
12753
12754if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12755 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12756Index = DAG.getNode(ISD::TRUNCATE,DL, IndexVT, Index);
12757 }
12758
12759unsigned IntID =
12760 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12761SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};
12762 Ops.push_back(Val);
12763 Ops.push_back(BasePtr);
12764 Ops.push_back(Index);
12765if (!IsUnmasked)
12766 Ops.push_back(Mask);
12767 Ops.push_back(VL);
12768
12769return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,DL,
12770 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12771}
12772
12773SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValueOp,
12774SelectionDAG &DAG) const{
12775constMVT XLenVT = Subtarget.getXLenVT();
12776SDLocDL(Op);
12777SDValue Chain =Op->getOperand(0);
12778SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm,DL, XLenVT);
12779SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12780SDValueRM = DAG.getNode(RISCVISD::READ_CSR,DL, VTs, Chain, SysRegNo);
12781
12782// Encoding used for rounding mode in RISC-V differs from that used in
12783// FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12784// table, which consists of a sequence of 4-bit fields, each representing
12785// corresponding FLT_ROUNDS mode.
12786staticconstint Table =
12787 (int(RoundingMode::NearestTiesToEven) << 4 *RISCVFPRndMode::RNE) |
12788 (int(RoundingMode::TowardZero) << 4 *RISCVFPRndMode::RTZ) |
12789 (int(RoundingMode::TowardNegative) << 4 *RISCVFPRndMode::RDN) |
12790 (int(RoundingMode::TowardPositive) << 4 *RISCVFPRndMode::RUP) |
12791 (int(RoundingMode::NearestTiesToAway) << 4 *RISCVFPRndMode::RMM);
12792
12793SDValue Shift =
12794 DAG.getNode(ISD::SHL,DL, XLenVT, RM, DAG.getConstant(2,DL, XLenVT));
12795SDValue Shifted = DAG.getNode(ISD::SRL,DL, XLenVT,
12796 DAG.getConstant(Table,DL, XLenVT), Shift);
12797SDValueMasked = DAG.getNode(ISD::AND,DL, XLenVT, Shifted,
12798 DAG.getConstant(7,DL, XLenVT));
12799
12800return DAG.getMergeValues({Masked, Chain},DL);
12801}
12802
12803SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValueOp,
12804SelectionDAG &DAG) const{
12805constMVT XLenVT = Subtarget.getXLenVT();
12806SDLocDL(Op);
12807SDValue Chain =Op->getOperand(0);
12808SDValue RMValue =Op->getOperand(1);
12809SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm,DL, XLenVT);
12810
12811// Encoding used for rounding mode in RISC-V differs from that used in
12812// FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12813// a table, which consists of a sequence of 4-bit fields, each representing
12814// corresponding RISC-V mode.
12815staticconstunsigned Table =
12816 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
12817 (RISCVFPRndMode::RTZ << 4 *int(RoundingMode::TowardZero)) |
12818 (RISCVFPRndMode::RDN << 4 *int(RoundingMode::TowardNegative)) |
12819 (RISCVFPRndMode::RUP << 4 *int(RoundingMode::TowardPositive)) |
12820 (RISCVFPRndMode::RMM << 4 *int(RoundingMode::NearestTiesToAway));
12821
12822 RMValue = DAG.getNode(ISD::ZERO_EXTEND,DL, XLenVT, RMValue);
12823
12824SDValue Shift = DAG.getNode(ISD::SHL,DL, XLenVT, RMValue,
12825 DAG.getConstant(2,DL, XLenVT));
12826SDValue Shifted = DAG.getNode(ISD::SRL,DL, XLenVT,
12827 DAG.getConstant(Table,DL, XLenVT), Shift);
12828 RMValue = DAG.getNode(ISD::AND,DL, XLenVT, Shifted,
12829 DAG.getConstant(0x7,DL, XLenVT));
12830return DAG.getNode(RISCVISD::WRITE_CSR,DL, MVT::Other, Chain, SysRegNo,
12831 RMValue);
12832}
12833
12834SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValueOp,
12835SelectionDAG &DAG) const{
12836MachineFunction &MF = DAG.getMachineFunction();
12837
12838bool isRISCV64 = Subtarget.is64Bit();
12839EVT PtrVT =getPointerTy(DAG.getDataLayout());
12840
12841int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0,false);
12842return DAG.getFrameIndex(FI, PtrVT);
12843}
12844
12845// Returns the opcode of the target-specific SDNode that implements the 32-bit
12846// form of the given Opcode.
12847staticRISCVISD::NodeTypegetRISCVWOpcode(unsigned Opcode) {
12848switch (Opcode) {
12849default:
12850llvm_unreachable("Unexpected opcode");
12851caseISD::SHL:
12852returnRISCVISD::SLLW;
12853caseISD::SRA:
12854returnRISCVISD::SRAW;
12855caseISD::SRL:
12856returnRISCVISD::SRLW;
12857caseISD::SDIV:
12858returnRISCVISD::DIVW;
12859caseISD::UDIV:
12860returnRISCVISD::DIVUW;
12861caseISD::UREM:
12862returnRISCVISD::REMUW;
12863caseISD::ROTL:
12864returnRISCVISD::ROLW;
12865caseISD::ROTR:
12866returnRISCVISD::RORW;
12867 }
12868}
12869
12870// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12871// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12872// otherwise be promoted to i64, making it difficult to select the
12873// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12874// type i8/i16/i32 is lost.
12875staticSDValuecustomLegalizeToWOp(SDNode *N,SelectionDAG &DAG,
12876unsigned ExtOpc =ISD::ANY_EXTEND) {
12877SDLocDL(N);
12878RISCVISD::NodeType WOpcode =getRISCVWOpcode(N->getOpcode());
12879SDValue NewOp0 = DAG.getNode(ExtOpc,DL, MVT::i64,N->getOperand(0));
12880SDValue NewOp1 = DAG.getNode(ExtOpc,DL, MVT::i64,N->getOperand(1));
12881SDValue NewRes = DAG.getNode(WOpcode,DL, MVT::i64, NewOp0, NewOp1);
12882// ReplaceNodeResults requires we maintain the same type for the return value.
12883return DAG.getNode(ISD::TRUNCATE,DL,N->getValueType(0), NewRes);
12884}
12885
12886// Converts the given 32-bit operation to a i64 operation with signed extension
12887// semantic to reduce the signed extension instructions.
12888staticSDValuecustomLegalizeToWOpWithSExt(SDNode *N,SelectionDAG &DAG) {
12889SDLocDL(N);
12890SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));
12891SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
12892SDValue NewWOp = DAG.getNode(N->getOpcode(),DL, MVT::i64, NewOp0, NewOp1);
12893SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, NewWOp,
12894 DAG.getValueType(MVT::i32));
12895return DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, NewRes);
12896}
12897
12898voidRISCVTargetLowering::ReplaceNodeResults(SDNode *N,
12899SmallVectorImpl<SDValue> &Results,
12900SelectionDAG &DAG) const{
12901SDLocDL(N);
12902switch (N->getOpcode()) {
12903default:
12904llvm_unreachable("Don't know how to custom type legalize this operation!");
12905caseISD::STRICT_FP_TO_SINT:
12906caseISD::STRICT_FP_TO_UINT:
12907caseISD::FP_TO_SINT:
12908caseISD::FP_TO_UINT: {
12909assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12910"Unexpected custom legalisation");
12911bool IsStrict =N->isStrictFPOpcode();
12912bool IsSigned =N->getOpcode() ==ISD::FP_TO_SINT ||
12913N->getOpcode() ==ISD::STRICT_FP_TO_SINT;
12914SDValue Op0 = IsStrict ?N->getOperand(1) :N->getOperand(0);
12915if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12916TargetLowering::TypeSoftenFloat) {
12917if (!isTypeLegal(Op0.getValueType()))
12918return;
12919if (IsStrict) {
12920SDValue Chain =N->getOperand(0);
12921// In absense of Zfh, promote f16 to f32, then convert.
12922if (Op0.getValueType() == MVT::f16 &&
12923 !Subtarget.hasStdExtZfhOrZhinx()) {
12924 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND,DL, {MVT::f32, MVT::Other},
12925 {Chain, Op0});
12926 Chain = Op0.getValue(1);
12927 }
12928unsigned Opc = IsSigned ?RISCVISD::STRICT_FCVT_W_RV64
12929 :RISCVISD::STRICT_FCVT_WU_RV64;
12930SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12931SDValue Res = DAG.getNode(
12932 Opc,DL, VTs, Chain, Op0,
12933 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, MVT::i64));
12934Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
12935Results.push_back(Res.getValue(1));
12936return;
12937 }
12938// For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12939// convert.
12940if ((Op0.getValueType() == MVT::f16 &&
12941 !Subtarget.hasStdExtZfhOrZhinx()) ||
12942 Op0.getValueType() == MVT::bf16)
12943 Op0 = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32, Op0);
12944
12945unsigned Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;
12946SDValue Res =
12947 DAG.getNode(Opc,DL, MVT::i64, Op0,
12948 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, MVT::i64));
12949Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
12950return;
12951 }
12952// If the FP type needs to be softened, emit a library call using the 'si'
12953// version. If we left it to default legalization we'd end up with 'di'. If
12954// the FP type doesn't need to be softened just let generic type
12955// legalization promote the result type.
12956RTLIB::Libcall LC;
12957if (IsSigned)
12958 LC =RTLIB::getFPTOSINT(Op0.getValueType(),N->getValueType(0));
12959else
12960 LC =RTLIB::getFPTOUINT(Op0.getValueType(),N->getValueType(0));
12961MakeLibCallOptions CallOptions;
12962EVT OpVT = Op0.getValueType();
12963 CallOptions.setTypeListBeforeSoften(OpVT,N->getValueType(0),true);
12964SDValue Chain = IsStrict ?N->getOperand(0) :SDValue();
12965SDValue Result;
12966 std::tie(Result, Chain) =
12967makeLibCall(DAG, LC,N->getValueType(0), Op0, CallOptions,DL, Chain);
12968Results.push_back(Result);
12969if (IsStrict)
12970Results.push_back(Chain);
12971break;
12972 }
12973caseISD::LROUND: {
12974SDValue Op0 =N->getOperand(0);
12975EVT Op0VT = Op0.getValueType();
12976if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12977TargetLowering::TypeSoftenFloat) {
12978if (!isTypeLegal(Op0VT))
12979return;
12980
12981// In absense of Zfh, promote f16 to f32, then convert.
12982if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12983 Op0 = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32, Op0);
12984
12985SDValue Res =
12986 DAG.getNode(RISCVISD::FCVT_W_RV64,DL, MVT::i64, Op0,
12987 DAG.getTargetConstant(RISCVFPRndMode::RMM,DL, MVT::i64));
12988Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
12989return;
12990 }
12991// If the FP type needs to be softened, emit a library call to lround. We'll
12992// need to truncate the result. We assume any value that doesn't fit in i32
12993// is allowed to return an unspecified value.
12994RTLIB::Libcall LC =
12995 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12996MakeLibCallOptions CallOptions;
12997EVT OpVT = Op0.getValueType();
12998 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64,true);
12999SDValue Result =makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions,DL).first;
13000 Result = DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Result);
13001Results.push_back(Result);
13002break;
13003 }
13004caseISD::READCYCLECOUNTER:
13005caseISD::READSTEADYCOUNTER: {
13006assert(!Subtarget.is64Bit() &&"READCYCLECOUNTER/READSTEADYCOUNTER only "
13007"has custom type legalization on riscv32");
13008
13009SDValue LoCounter, HiCounter;
13010MVT XLenVT = Subtarget.getXLenVT();
13011if (N->getOpcode() ==ISD::READCYCLECOUNTER) {
13012 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle,DL, XLenVT);
13013 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh,DL, XLenVT);
13014 }else {
13015 LoCounter = DAG.getTargetConstant(RISCVSysReg::time,DL, XLenVT);
13016 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh,DL, XLenVT);
13017 }
13018SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
13019SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE,DL, VTs,
13020N->getOperand(0), LoCounter, HiCounter);
13021
13022Results.push_back(
13023 DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64, RCW, RCW.getValue(1)));
13024Results.push_back(RCW.getValue(2));
13025break;
13026 }
13027caseISD::LOAD: {
13028if (!ISD::isNON_EXTLoad(N))
13029return;
13030
13031// Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13032// sext_inreg we emit for ADD/SUB/MUL/SLLI.
13033LoadSDNode *Ld = cast<LoadSDNode>(N);
13034
13035SDLoc dl(N);
13036SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13037 Ld->getBasePtr(), Ld->getMemoryVT(),
13038 Ld->getMemOperand());
13039Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13040Results.push_back(Res.getValue(1));
13041return;
13042 }
13043caseISD::MUL: {
13044unsignedSize =N->getSimpleValueType(0).getSizeInBits();
13045unsigned XLen = Subtarget.getXLen();
13046// This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13047if (Size > XLen) {
13048assert(Size == (XLen * 2) &&"Unexpected custom legalisation");
13049SDValueLHS =N->getOperand(0);
13050SDValueRHS =N->getOperand(1);
13051APInt HighMask =APInt::getHighBitsSet(Size, XLen);
13052
13053bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13054bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13055// We need exactly one side to be unsigned.
13056if (LHSIsU == RHSIsU)
13057return;
13058
13059auto MakeMULPair = [&](SDValue S,SDValue U) {
13060MVT XLenVT = Subtarget.getXLenVT();
13061 S = DAG.getNode(ISD::TRUNCATE,DL, XLenVT, S);
13062 U = DAG.getNode(ISD::TRUNCATE,DL, XLenVT, U);
13063SDValueLo = DAG.getNode(ISD::MUL,DL, XLenVT, S, U);
13064SDValueHi = DAG.getNode(RISCVISD::MULHSU,DL, XLenVT, S, U);
13065return DAG.getNode(ISD::BUILD_PAIR,DL,N->getValueType(0),Lo,Hi);
13066 };
13067
13068bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13069bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13070
13071// The other operand should be signed, but still prefer MULH when
13072// possible.
13073if (RHSIsU && LHSIsS && !RHSIsS)
13074Results.push_back(MakeMULPair(LHS,RHS));
13075elseif (LHSIsU && RHSIsS && !LHSIsS)
13076Results.push_back(MakeMULPair(RHS,LHS));
13077
13078return;
13079 }
13080 [[fallthrough]];
13081 }
13082caseISD::ADD:
13083caseISD::SUB:
13084assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13085"Unexpected custom legalisation");
13086Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13087break;
13088caseISD::SHL:
13089caseISD::SRA:
13090caseISD::SRL:
13091assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13092"Unexpected custom legalisation");
13093if (N->getOperand(1).getOpcode() !=ISD::Constant) {
13094// If we can use a BSET instruction, allow default promotion to apply.
13095if (N->getOpcode() ==ISD::SHL && Subtarget.hasStdExtZbs() &&
13096isOneConstant(N->getOperand(0)))
13097break;
13098Results.push_back(customLegalizeToWOp(N, DAG));
13099break;
13100 }
13101
13102// Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13103// similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13104// shift amount.
13105if (N->getOpcode() ==ISD::SHL) {
13106SDLocDL(N);
13107SDValue NewOp0 =
13108 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));
13109SDValue NewOp1 =
13110 DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64,N->getOperand(1));
13111SDValue NewWOp = DAG.getNode(ISD::SHL,DL, MVT::i64, NewOp0, NewOp1);
13112SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, NewWOp,
13113 DAG.getValueType(MVT::i32));
13114Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, NewRes));
13115 }
13116
13117break;
13118caseISD::ROTL:
13119caseISD::ROTR:
13120assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13121"Unexpected custom legalisation");
13122assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13123 Subtarget.hasVendorXTHeadBb()) &&
13124"Unexpected custom legalization");
13125if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13126 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13127return;
13128Results.push_back(customLegalizeToWOp(N, DAG));
13129break;
13130caseISD::CTTZ:
13131caseISD::CTTZ_ZERO_UNDEF:
13132caseISD::CTLZ:
13133caseISD::CTLZ_ZERO_UNDEF: {
13134assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13135"Unexpected custom legalisation");
13136
13137SDValue NewOp0 =
13138 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));
13139bool IsCTZ =
13140N->getOpcode() ==ISD::CTTZ ||N->getOpcode() ==ISD::CTTZ_ZERO_UNDEF;
13141unsigned Opc = IsCTZ ?RISCVISD::CTZW :RISCVISD::CLZW;
13142SDValue Res = DAG.getNode(Opc,DL, MVT::i64, NewOp0);
13143Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13144return;
13145 }
13146caseISD::SDIV:
13147caseISD::UDIV:
13148caseISD::UREM: {
13149MVT VT =N->getSimpleValueType(0);
13150assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13151 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13152"Unexpected custom legalisation");
13153// Don't promote division/remainder by constant since we should expand those
13154// to multiply by magic constant.
13155AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
13156if (N->getOperand(1).getOpcode() ==ISD::Constant &&
13157 !isIntDivCheap(N->getValueType(0), Attr))
13158return;
13159
13160// If the input is i32, use ANY_EXTEND since the W instructions don't read
13161// the upper 32 bits. For other types we need to sign or zero extend
13162// based on the opcode.
13163unsigned ExtOpc =ISD::ANY_EXTEND;
13164if (VT != MVT::i32)
13165 ExtOpc =N->getOpcode() ==ISD::SDIV ?ISD::SIGN_EXTEND
13166 :ISD::ZERO_EXTEND;
13167
13168Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13169break;
13170 }
13171caseISD::SADDO: {
13172assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13173"Unexpected custom legalisation");
13174
13175// If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13176// use the default legalization.
13177if (!isa<ConstantSDNode>(N->getOperand(1)))
13178return;
13179
13180SDValueLHS = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,N->getOperand(0));
13181SDValueRHS = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,N->getOperand(1));
13182SDValue Res = DAG.getNode(ISD::ADD,DL, MVT::i64,LHS,RHS);
13183 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, Res,
13184 DAG.getValueType(MVT::i32));
13185
13186SDValue Zero = DAG.getConstant(0,DL, MVT::i64);
13187
13188// For an addition, the result should be less than one of the operands (LHS)
13189// if and only if the other operand (RHS) is negative, otherwise there will
13190// be overflow.
13191// For a subtraction, the result should be less than one of the operands
13192// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13193// otherwise there will be overflow.
13194EVT OType =N->getValueType(1);
13195SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res,LHS,ISD::SETLT);
13196SDValue ConditionRHS = DAG.getSetCC(DL, OType,RHS, Zero,ISD::SETLT);
13197
13198SDValue Overflow =
13199 DAG.getNode(ISD::XOR,DL, OType, ConditionRHS, ResultLowerThanLHS);
13200Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13201Results.push_back(Overflow);
13202return;
13203 }
13204caseISD::UADDO:
13205caseISD::USUBO: {
13206assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13207"Unexpected custom legalisation");
13208bool IsAdd =N->getOpcode() ==ISD::UADDO;
13209// Create an ADDW or SUBW.
13210SDValueLHS = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));
13211SDValueRHS = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
13212SDValue Res =
13213 DAG.getNode(IsAdd ?ISD::ADD :ISD::SUB,DL, MVT::i64,LHS,RHS);
13214 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, Res,
13215 DAG.getValueType(MVT::i32));
13216
13217SDValue Overflow;
13218if (IsAdd &&isOneConstant(RHS)) {
13219// Special case uaddo X, 1 overflowed if the addition result is 0.
13220// The general case (X + C) < C is not necessarily beneficial. Although we
13221// reduce the live range of X, we may introduce the materialization of
13222// constant C, especially when the setcc result is used by branch. We have
13223// no compare with constant and branch instructions.
13224 Overflow = DAG.getSetCC(DL,N->getValueType(1), Res,
13225 DAG.getConstant(0,DL, MVT::i64),ISD::SETEQ);
13226 }elseif (IsAdd &&isAllOnesConstant(RHS)) {
13227// Special case uaddo X, -1 overflowed if X != 0.
13228 Overflow = DAG.getSetCC(DL,N->getValueType(1),N->getOperand(0),
13229 DAG.getConstant(0,DL, MVT::i32),ISD::SETNE);
13230 }else {
13231// Sign extend the LHS and perform an unsigned compare with the ADDW
13232// result. Since the inputs are sign extended from i32, this is equivalent
13233// to comparing the lower 32 bits.
13234LHS = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,N->getOperand(0));
13235 Overflow = DAG.getSetCC(DL,N->getValueType(1), Res,LHS,
13236 IsAdd ?ISD::SETULT :ISD::SETUGT);
13237 }
13238
13239Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13240Results.push_back(Overflow);
13241return;
13242 }
13243caseISD::UADDSAT:
13244caseISD::USUBSAT: {
13245assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13246 !Subtarget.hasStdExtZbb() &&"Unexpected custom legalisation");
13247// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13248// promotion for UADDO/USUBO.
13249Results.push_back(expandAddSubSat(N, DAG));
13250return;
13251 }
13252caseISD::SADDSAT:
13253caseISD::SSUBSAT: {
13254assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13255"Unexpected custom legalisation");
13256Results.push_back(expandAddSubSat(N, DAG));
13257return;
13258 }
13259caseISD::ABS: {
13260assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13261"Unexpected custom legalisation");
13262
13263if (Subtarget.hasStdExtZbb()) {
13264// Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13265// This allows us to remember that the result is sign extended. Expanding
13266// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13267SDValue Src = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,
13268N->getOperand(0));
13269SDValue Abs = DAG.getNode(RISCVISD::ABSW,DL, MVT::i64, Src);
13270Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Abs));
13271return;
13272 }
13273
13274// Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13275SDValue Src = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));
13276
13277// Freeze the source so we can increase it's use count.
13278 Src = DAG.getFreeze(Src);
13279
13280// Copy sign bit to all bits using the sraiw pattern.
13281SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, Src,
13282 DAG.getValueType(MVT::i32));
13283 SignFill = DAG.getNode(ISD::SRA,DL, MVT::i64, SignFill,
13284 DAG.getConstant(31,DL, MVT::i64));
13285
13286SDValue NewRes = DAG.getNode(ISD::XOR,DL, MVT::i64, Src, SignFill);
13287 NewRes = DAG.getNode(ISD::SUB,DL, MVT::i64, NewRes, SignFill);
13288
13289// NOTE: The result is only required to be anyextended, but sext is
13290// consistent with type legalization of sub.
13291 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, NewRes,
13292 DAG.getValueType(MVT::i32));
13293Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, NewRes));
13294return;
13295 }
13296caseISD::BITCAST: {
13297EVT VT =N->getValueType(0);
13298assert(VT.isInteger() && !VT.isVector() &&"Unexpected VT!");
13299SDValue Op0 =N->getOperand(0);
13300EVT Op0VT = Op0.getValueType();
13301MVT XLenVT = Subtarget.getXLenVT();
13302if (VT == MVT::i16 &&
13303 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13304 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13305SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Op0);
13306Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i16, FPConv));
13307 }elseif (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13308 Subtarget.hasStdExtFOrZfinx()) {
13309SDValue FPConv =
13310 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Op0);
13311Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, FPConv));
13312 }elseif (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13313 Subtarget.hasStdExtDOrZdinx()) {
13314SDValue NewReg = DAG.getNode(RISCVISD::SplitF64,DL,
13315 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13316SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64,
13317 NewReg.getValue(0), NewReg.getValue(1));
13318Results.push_back(RetReg);
13319 }elseif (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13320isTypeLegal(Op0VT)) {
13321// Custom-legalize bitcasts from fixed-length vector types to illegal
13322// scalar types in order to improve codegen. Bitcast the vector to a
13323// one-element vector type whose element type is the same as the result
13324// type, and extract the first element.
13325EVT BVT =EVT::getVectorVT(*DAG.getContext(), VT, 1);
13326if (isTypeLegal(BVT)) {
13327SDValue BVec = DAG.getBitcast(BVT, Op0);
13328Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, VT, BVec,
13329 DAG.getVectorIdxConstant(0,DL)));
13330 }
13331 }
13332break;
13333 }
13334caseRISCVISD::BREV8:
13335caseRISCVISD::ORC_B: {
13336MVT VT =N->getSimpleValueType(0);
13337MVT XLenVT = Subtarget.getXLenVT();
13338assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13339"Unexpected custom legalisation");
13340assert(((N->getOpcode() ==RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13341 (N->getOpcode() ==RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13342"Unexpected extension");
13343SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT,N->getOperand(0));
13344SDValue NewRes = DAG.getNode(N->getOpcode(),DL, XLenVT, NewOp);
13345// ReplaceNodeResults requires we maintain the same type for the return
13346// value.
13347Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, VT, NewRes));
13348break;
13349 }
13350caseISD::EXTRACT_VECTOR_ELT: {
13351// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13352// type is illegal (currently only vXi64 RV32).
13353// With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13354// transferred to the destination register. We issue two of these from the
13355// upper- and lower- halves of the SEW-bit vector element, slid down to the
13356// first element.
13357SDValue Vec =N->getOperand(0);
13358SDValueIdx =N->getOperand(1);
13359
13360// The vector type hasn't been legalized yet so we can't issue target
13361// specific nodes if it needs legalization.
13362// FIXME: We would manually legalize if it's important.
13363if (!isTypeLegal(Vec.getValueType()))
13364return;
13365
13366MVT VecVT = Vec.getSimpleValueType();
13367
13368assert(!Subtarget.is64Bit() &&N->getValueType(0) == MVT::i64 &&
13369 VecVT.getVectorElementType() == MVT::i64 &&
13370"Unexpected EXTRACT_VECTOR_ELT legalization");
13371
13372// If this is a fixed vector, we need to convert it to a scalable vector.
13373MVT ContainerVT = VecVT;
13374if (VecVT.isFixedLengthVector()) {
13375 ContainerVT =getContainerForFixedLengthVector(VecVT);
13376 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13377 }
13378
13379MVT XLenVT = Subtarget.getXLenVT();
13380
13381// Use a VL of 1 to avoid processing more elements than we need.
13382auto [Mask, VL] =getDefaultVLOps(1, ContainerVT,DL, DAG, Subtarget);
13383
13384// Unless the index is known to be 0, we must slide the vector down to get
13385// the desired element into index 0.
13386if (!isNullConstant(Idx)) {
13387 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT,
13388 DAG.getUNDEF(ContainerVT), Vec,Idx, Mask, VL);
13389 }
13390
13391// Extract the lower XLEN bits of the correct vector element.
13392SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, Vec);
13393
13394// To extract the upper XLEN bits of the vector element, shift the first
13395// element right by 32 bits and re-extract the lower XLEN bits.
13396SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,
13397 DAG.getUNDEF(ContainerVT),
13398 DAG.getConstant(32,DL, XLenVT), VL);
13399SDValue LShr32 =
13400 DAG.getNode(RISCVISD::SRL_VL,DL, ContainerVT, Vec, ThirtyTwoV,
13401 DAG.getUNDEF(ContainerVT), Mask, VL);
13402
13403SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, LShr32);
13404
13405Results.push_back(DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64, EltLo, EltHi));
13406break;
13407 }
13408caseISD::INTRINSIC_WO_CHAIN: {
13409unsigned IntNo =N->getConstantOperandVal(0);
13410switch (IntNo) {
13411default:
13412llvm_unreachable(
13413"Don't know how to custom type legalize this intrinsic!");
13414case Intrinsic::experimental_get_vector_length: {
13415SDValue Res =lowerGetVectorLength(N, DAG, Subtarget);
13416Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13417return;
13418 }
13419case Intrinsic::experimental_cttz_elts: {
13420SDValue Res =lowerCttzElts(N, DAG, Subtarget);
13421Results.push_back(
13422 DAG.getNode(ISD::TRUNCATE,DL,N->getValueType(0), Res));
13423return;
13424 }
13425case Intrinsic::riscv_orc_b:
13426case Intrinsic::riscv_brev8:
13427case Intrinsic::riscv_sha256sig0:
13428case Intrinsic::riscv_sha256sig1:
13429case Intrinsic::riscv_sha256sum0:
13430case Intrinsic::riscv_sha256sum1:
13431case Intrinsic::riscv_sm3p0:
13432case Intrinsic::riscv_sm3p1: {
13433if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)
13434return;
13435unsigned Opc;
13436switch (IntNo) {
13437case Intrinsic::riscv_orc_b: Opc =RISCVISD::ORC_B;break;
13438case Intrinsic::riscv_brev8: Opc =RISCVISD::BREV8;break;
13439case Intrinsic::riscv_sha256sig0: Opc =RISCVISD::SHA256SIG0;break;
13440case Intrinsic::riscv_sha256sig1: Opc =RISCVISD::SHA256SIG1;break;
13441case Intrinsic::riscv_sha256sum0: Opc =RISCVISD::SHA256SUM0;break;
13442case Intrinsic::riscv_sha256sum1: Opc =RISCVISD::SHA256SUM1;break;
13443case Intrinsic::riscv_sm3p0: Opc =RISCVISD::SM3P0;break;
13444case Intrinsic::riscv_sm3p1: Opc =RISCVISD::SM3P1;break;
13445 }
13446
13447SDValue NewOp =
13448 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
13449SDValue Res = DAG.getNode(Opc,DL, MVT::i64, NewOp);
13450Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13451return;
13452 }
13453case Intrinsic::riscv_sm4ks:
13454case Intrinsic::riscv_sm4ed: {
13455unsigned Opc =
13456 IntNo == Intrinsic::riscv_sm4ks ?RISCVISD::SM4KS :RISCVISD::SM4ED;
13457SDValue NewOp0 =
13458 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
13459SDValue NewOp1 =
13460 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));
13461SDValue Res =
13462 DAG.getNode(Opc,DL, MVT::i64, NewOp0, NewOp1,N->getOperand(3));
13463Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13464return;
13465 }
13466case Intrinsic::riscv_mopr: {
13467if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)
13468return;
13469SDValue NewOp =
13470 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
13471SDValue Res = DAG.getNode(
13472RISCVISD::MOPR,DL, MVT::i64, NewOp,
13473 DAG.getTargetConstant(N->getConstantOperandVal(2),DL, MVT::i64));
13474Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13475return;
13476 }
13477case Intrinsic::riscv_moprr: {
13478if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)
13479return;
13480SDValue NewOp0 =
13481 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
13482SDValue NewOp1 =
13483 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));
13484SDValue Res = DAG.getNode(
13485RISCVISD::MOPRR,DL, MVT::i64, NewOp0, NewOp1,
13486 DAG.getTargetConstant(N->getConstantOperandVal(3),DL, MVT::i64));
13487Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13488return;
13489 }
13490case Intrinsic::riscv_clmul: {
13491if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)
13492return;
13493
13494SDValue NewOp0 =
13495 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
13496SDValue NewOp1 =
13497 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));
13498SDValue Res = DAG.getNode(RISCVISD::CLMUL,DL, MVT::i64, NewOp0, NewOp1);
13499Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13500return;
13501 }
13502case Intrinsic::riscv_clmulh:
13503case Intrinsic::riscv_clmulr: {
13504if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)
13505return;
13506
13507// Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13508// to the full 128-bit clmul result of multiplying two xlen values.
13509// Perform clmulr or clmulh on the shifted values. Finally, extract the
13510// upper 32 bits.
13511//
13512// The alternative is to mask the inputs to 32 bits and use clmul, but
13513// that requires two shifts to mask each input without zext.w.
13514// FIXME: If the inputs are known zero extended or could be freely
13515// zero extended, the mask form would be better.
13516SDValue NewOp0 =
13517 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));
13518SDValue NewOp1 =
13519 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));
13520 NewOp0 = DAG.getNode(ISD::SHL,DL, MVT::i64, NewOp0,
13521 DAG.getConstant(32,DL, MVT::i64));
13522 NewOp1 = DAG.getNode(ISD::SHL,DL, MVT::i64, NewOp1,
13523 DAG.getConstant(32,DL, MVT::i64));
13524unsigned Opc = IntNo == Intrinsic::riscv_clmulh ?RISCVISD::CLMULH
13525 :RISCVISD::CLMULR;
13526SDValue Res = DAG.getNode(Opc,DL, MVT::i64, NewOp0, NewOp1);
13527 Res = DAG.getNode(ISD::SRL,DL, MVT::i64, Res,
13528 DAG.getConstant(32,DL, MVT::i64));
13529Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));
13530return;
13531 }
13532case Intrinsic::riscv_vmv_x_s: {
13533EVT VT =N->getValueType(0);
13534MVT XLenVT = Subtarget.getXLenVT();
13535if (VT.bitsLT(XLenVT)) {
13536// Simple case just extract using vmv.x.s and truncate.
13537SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S,DL,
13538 Subtarget.getXLenVT(),N->getOperand(1));
13539Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, VT, Extract));
13540return;
13541 }
13542
13543assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13544"Unexpected custom legalization");
13545
13546// We need to do the move in two steps.
13547SDValue Vec =N->getOperand(1);
13548MVT VecVT = Vec.getSimpleValueType();
13549
13550// First extract the lower XLEN bits of the element.
13551SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, Vec);
13552
13553// To extract the upper XLEN bits of the vector element, shift the first
13554// element right by 32 bits and re-extract the lower XLEN bits.
13555auto [Mask, VL] =getDefaultVLOps(1, VecVT,DL, DAG, Subtarget);
13556
13557SDValue ThirtyTwoV =
13558 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VecVT, DAG.getUNDEF(VecVT),
13559 DAG.getConstant(32,DL, XLenVT), VL);
13560SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL,DL, VecVT, Vec, ThirtyTwoV,
13561 DAG.getUNDEF(VecVT), Mask, VL);
13562SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, LShr32);
13563
13564Results.push_back(
13565 DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64, EltLo, EltHi));
13566break;
13567 }
13568 }
13569break;
13570 }
13571caseISD::VECREDUCE_ADD:
13572caseISD::VECREDUCE_AND:
13573caseISD::VECREDUCE_OR:
13574caseISD::VECREDUCE_XOR:
13575caseISD::VECREDUCE_SMAX:
13576caseISD::VECREDUCE_UMAX:
13577caseISD::VECREDUCE_SMIN:
13578caseISD::VECREDUCE_UMIN:
13579if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13580Results.push_back(V);
13581break;
13582case ISD::VP_REDUCE_ADD:
13583case ISD::VP_REDUCE_AND:
13584case ISD::VP_REDUCE_OR:
13585case ISD::VP_REDUCE_XOR:
13586case ISD::VP_REDUCE_SMAX:
13587case ISD::VP_REDUCE_UMAX:
13588case ISD::VP_REDUCE_SMIN:
13589case ISD::VP_REDUCE_UMIN:
13590if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13591Results.push_back(V);
13592break;
13593caseISD::GET_ROUNDING: {
13594SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13595SDValue Res = DAG.getNode(ISD::GET_ROUNDING,DL, VTs,N->getOperand(0));
13596Results.push_back(Res.getValue(0));
13597Results.push_back(Res.getValue(1));
13598break;
13599 }
13600 }
13601}
13602
13603/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13604/// which corresponds to it.
13605staticunsignedgetVecReduceOpcode(unsigned Opc) {
13606switch (Opc) {
13607default:
13608llvm_unreachable("Unhandled binary to transfrom reduction");
13609caseISD::ADD:
13610returnISD::VECREDUCE_ADD;
13611caseISD::UMAX:
13612returnISD::VECREDUCE_UMAX;
13613caseISD::SMAX:
13614returnISD::VECREDUCE_SMAX;
13615caseISD::UMIN:
13616returnISD::VECREDUCE_UMIN;
13617caseISD::SMIN:
13618returnISD::VECREDUCE_SMIN;
13619caseISD::AND:
13620returnISD::VECREDUCE_AND;
13621caseISD::OR:
13622returnISD::VECREDUCE_OR;
13623caseISD::XOR:
13624returnISD::VECREDUCE_XOR;
13625caseISD::FADD:
13626// Note: This is the associative form of the generic reduction opcode.
13627returnISD::VECREDUCE_FADD;
13628 }
13629}
13630
13631/// Perform two related transforms whose purpose is to incrementally recognize
13632/// an explode_vector followed by scalar reduction as a vector reduction node.
13633/// This exists to recover from a deficiency in SLP which can't handle
13634/// forests with multiple roots sharing common nodes. In some cases, one
13635/// of the trees will be vectorized, and the other will remain (unprofitably)
13636/// scalarized.
13637staticSDValue
13638combineBinOpOfExtractToReduceTree(SDNode *N,SelectionDAG &DAG,
13639constRISCVSubtarget &Subtarget) {
13640
13641// This transforms need to run before all integer types have been legalized
13642// to i64 (so that the vector element type matches the add type), and while
13643// it's safe to introduce odd sized vector types.
13644if (DAG.NewNodesMustHaveLegalTypes)
13645returnSDValue();
13646
13647// Without V, this transform isn't useful. We could form the (illegal)
13648// operations and let them be scalarized again, but there's really no point.
13649if (!Subtarget.hasVInstructions())
13650returnSDValue();
13651
13652constSDLocDL(N);
13653constEVT VT =N->getValueType(0);
13654constunsigned Opc =N->getOpcode();
13655
13656// For FADD, we only handle the case with reassociation allowed. We
13657// could handle strict reduction order, but at the moment, there's no
13658// known reason to, and the complexity isn't worth it.
13659// TODO: Handle fminnum and fmaxnum here
13660if (!VT.isInteger() &&
13661 (Opc !=ISD::FADD || !N->getFlags().hasAllowReassociation()))
13662returnSDValue();
13663
13664constunsigned ReduceOpc =getVecReduceOpcode(Opc);
13665assert(Opc ==ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13666"Inconsistent mappings");
13667SDValueLHS =N->getOperand(0);
13668SDValueRHS =N->getOperand(1);
13669
13670if (!LHS.hasOneUse() || !RHS.hasOneUse())
13671returnSDValue();
13672
13673if (RHS.getOpcode() !=ISD::EXTRACT_VECTOR_ELT)
13674std::swap(LHS,RHS);
13675
13676if (RHS.getOpcode() !=ISD::EXTRACT_VECTOR_ELT ||
13677 !isa<ConstantSDNode>(RHS.getOperand(1)))
13678returnSDValue();
13679
13680uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13681SDValue SrcVec =RHS.getOperand(0);
13682EVT SrcVecVT = SrcVec.getValueType();
13683assert(SrcVecVT.getVectorElementType() == VT);
13684if (SrcVecVT.isScalableVector())
13685returnSDValue();
13686
13687if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13688returnSDValue();
13689
13690// match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13691// reduce_op (extract_subvector [2 x VT] from V). This will form the
13692// root of our reduction tree. TODO: We could extend this to any two
13693// adjacent aligned constant indices if desired.
13694if (LHS.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&
13695LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13696uint64_t LHSIdx =
13697 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13698if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13699EVT ReduceVT =EVT::getVectorVT(*DAG.getContext(), VT, 2);
13700SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ReduceVT, SrcVec,
13701 DAG.getVectorIdxConstant(0,DL));
13702return DAG.getNode(ReduceOpc,DL, VT, Vec,N->getFlags());
13703 }
13704 }
13705
13706// Match (binop (reduce (extract_subvector V, 0),
13707// (extract_vector_elt V, sizeof(SubVec))))
13708// into a reduction of one more element from the original vector V.
13709if (LHS.getOpcode() != ReduceOpc)
13710returnSDValue();
13711
13712SDValue ReduceVec =LHS.getOperand(0);
13713if (ReduceVec.getOpcode() ==ISD::EXTRACT_SUBVECTOR &&
13714 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) ==RHS.getOperand(0) &&
13715isNullConstant(ReduceVec.getOperand(1)) &&
13716 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13717// For illegal types (e.g. 3xi32), most will be combined again into a
13718// wider (hopefully legal) type. If this is a terminal state, we are
13719// relying on type legalization here to produce something reasonable
13720// and this lowering quality could probably be improved. (TODO)
13721EVT ReduceVT =EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13722SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ReduceVT, SrcVec,
13723 DAG.getVectorIdxConstant(0,DL));
13724return DAG.getNode(ReduceOpc,DL, VT, Vec,
13725 ReduceVec->getFlags() &N->getFlags());
13726 }
13727
13728returnSDValue();
13729}
13730
13731
13732// Try to fold (<bop> x, (reduction.<bop> vec, start))
13733staticSDValuecombineBinOpToReduce(SDNode *N,SelectionDAG &DAG,
13734constRISCVSubtarget &Subtarget) {
13735auto BinOpToRVVReduce = [](unsigned Opc) {
13736switch (Opc) {
13737default:
13738llvm_unreachable("Unhandled binary to transfrom reduction");
13739caseISD::ADD:
13740returnRISCVISD::VECREDUCE_ADD_VL;
13741caseISD::UMAX:
13742returnRISCVISD::VECREDUCE_UMAX_VL;
13743caseISD::SMAX:
13744returnRISCVISD::VECREDUCE_SMAX_VL;
13745caseISD::UMIN:
13746returnRISCVISD::VECREDUCE_UMIN_VL;
13747caseISD::SMIN:
13748returnRISCVISD::VECREDUCE_SMIN_VL;
13749caseISD::AND:
13750returnRISCVISD::VECREDUCE_AND_VL;
13751caseISD::OR:
13752returnRISCVISD::VECREDUCE_OR_VL;
13753caseISD::XOR:
13754returnRISCVISD::VECREDUCE_XOR_VL;
13755caseISD::FADD:
13756returnRISCVISD::VECREDUCE_FADD_VL;
13757caseISD::FMAXNUM:
13758returnRISCVISD::VECREDUCE_FMAX_VL;
13759caseISD::FMINNUM:
13760returnRISCVISD::VECREDUCE_FMIN_VL;
13761 }
13762 };
13763
13764auto IsReduction = [&BinOpToRVVReduce](SDValue V,unsigned Opc) {
13765return V.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&
13766isNullConstant(V.getOperand(1)) &&
13767 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13768 };
13769
13770unsigned Opc =N->getOpcode();
13771unsigned ReduceIdx;
13772if (IsReduction(N->getOperand(0), Opc))
13773 ReduceIdx = 0;
13774elseif (IsReduction(N->getOperand(1), Opc))
13775 ReduceIdx = 1;
13776else
13777returnSDValue();
13778
13779// Skip if FADD disallows reassociation but the combiner needs.
13780if (Opc ==ISD::FADD && !N->getFlags().hasAllowReassociation())
13781returnSDValue();
13782
13783SDValue Extract =N->getOperand(ReduceIdx);
13784SDValue Reduce = Extract.getOperand(0);
13785if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13786returnSDValue();
13787
13788SDValue ScalarV = Reduce.getOperand(2);
13789EVT ScalarVT = ScalarV.getValueType();
13790if (ScalarV.getOpcode() ==ISD::INSERT_SUBVECTOR &&
13791 ScalarV.getOperand(0)->isUndef() &&
13792isNullConstant(ScalarV.getOperand(2)))
13793 ScalarV = ScalarV.getOperand(1);
13794
13795// Make sure that ScalarV is a splat with VL=1.
13796if (ScalarV.getOpcode() !=RISCVISD::VFMV_S_F_VL &&
13797 ScalarV.getOpcode() !=RISCVISD::VMV_S_X_VL &&
13798 ScalarV.getOpcode() !=RISCVISD::VMV_V_X_VL)
13799returnSDValue();
13800
13801if (!isNonZeroAVL(ScalarV.getOperand(2)))
13802returnSDValue();
13803
13804// Check the scalar of ScalarV is neutral element
13805// TODO: Deal with value other than neutral element.
13806if (!isNeutralConstant(N->getOpcode(),N->getFlags(), ScalarV.getOperand(1),
13807 0))
13808returnSDValue();
13809
13810// If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13811// FIXME: We might be able to improve this if operand 0 is undef.
13812if (!isNonZeroAVL(Reduce.getOperand(5)))
13813returnSDValue();
13814
13815SDValue NewStart =N->getOperand(1 - ReduceIdx);
13816
13817SDLocDL(N);
13818SDValue NewScalarV =
13819lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13820 ScalarV.getSimpleValueType(),DL, DAG, Subtarget);
13821
13822// If we looked through an INSERT_SUBVECTOR we need to restore it.
13823if (ScalarVT != ScalarV.getValueType())
13824 NewScalarV =
13825 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13826 NewScalarV, DAG.getVectorIdxConstant(0,DL));
13827
13828SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13829 NewScalarV, Reduce.getOperand(3),
13830 Reduce.getOperand(4), Reduce.getOperand(5)};
13831SDValue NewReduce =
13832 DAG.getNode(Reduce.getOpcode(),DL, Reduce.getValueType(), Ops);
13833return DAG.getNode(Extract.getOpcode(),DL, Extract.getValueType(), NewReduce,
13834 Extract.getOperand(1));
13835}
13836
13837// Optimize (add (shl x, c0), (shl y, c1)) ->
13838// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13839staticSDValuetransformAddShlImm(SDNode *N,SelectionDAG &DAG,
13840constRISCVSubtarget &Subtarget) {
13841// Perform this optimization only in the zba extension.
13842if (!Subtarget.hasStdExtZba())
13843returnSDValue();
13844
13845// Skip for vector types and larger types.
13846EVT VT =N->getValueType(0);
13847if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13848returnSDValue();
13849
13850// The two operand nodes must be SHL and have no other use.
13851SDValue N0 =N->getOperand(0);
13852SDValue N1 =N->getOperand(1);
13853if (N0->getOpcode() !=ISD::SHL || N1->getOpcode() !=ISD::SHL ||
13854 !N0->hasOneUse() || !N1->hasOneUse())
13855returnSDValue();
13856
13857// Check c0 and c1.
13858auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13859auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13860if (!N0C || !N1C)
13861returnSDValue();
13862 int64_t C0 = N0C->getSExtValue();
13863 int64_t C1 = N1C->getSExtValue();
13864if (C0 <= 0 || C1 <= 0)
13865returnSDValue();
13866
13867// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13868 int64_t Bits = std::min(C0, C1);
13869 int64_t Diff = std::abs(C0 - C1);
13870if (Diff != 1 && Diff != 2 && Diff != 3)
13871returnSDValue();
13872
13873// Build nodes.
13874SDLocDL(N);
13875SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13876SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13877SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD,DL, VT, NL,
13878 DAG.getConstant(Diff,DL, VT), NS);
13879return DAG.getNode(ISD::SHL,DL, VT, SHADD, DAG.getConstant(Bits,DL, VT));
13880}
13881
13882// Combine a constant select operand into its use:
13883//
13884// (and (select cond, -1, c), x)
13885// -> (select cond, x, (and x, c)) [AllOnes=1]
13886// (or (select cond, 0, c), x)
13887// -> (select cond, x, (or x, c)) [AllOnes=0]
13888// (xor (select cond, 0, c), x)
13889// -> (select cond, x, (xor x, c)) [AllOnes=0]
13890// (add (select cond, 0, c), x)
13891// -> (select cond, x, (add x, c)) [AllOnes=0]
13892// (sub x, (select cond, 0, c))
13893// -> (select cond, x, (sub x, c)) [AllOnes=0]
13894staticSDValuecombineSelectAndUse(SDNode *N,SDValue Slct,SDValue OtherOp,
13895SelectionDAG &DAG,boolAllOnes,
13896constRISCVSubtarget &Subtarget) {
13897EVT VT =N->getValueType(0);
13898
13899// Skip vectors.
13900if (VT.isVector())
13901returnSDValue();
13902
13903if (!Subtarget.hasConditionalMoveFusion()) {
13904// (select cond, x, (and x, c)) has custom lowering with Zicond.
13905if ((!Subtarget.hasStdExtZicond() &&
13906 !Subtarget.hasVendorXVentanaCondOps()) ||
13907N->getOpcode() !=ISD::AND)
13908returnSDValue();
13909
13910// Maybe harmful when condition code has multiple use.
13911if (Slct.getOpcode() ==ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13912returnSDValue();
13913
13914// Maybe harmful when VT is wider than XLen.
13915if (VT.getSizeInBits() > Subtarget.getXLen())
13916returnSDValue();
13917 }
13918
13919if ((Slct.getOpcode() !=ISD::SELECT &&
13920 Slct.getOpcode() !=RISCVISD::SELECT_CC) ||
13921 !Slct.hasOneUse())
13922returnSDValue();
13923
13924autoisZeroOrAllOnes = [](SDValueN,boolAllOnes) {
13925returnAllOnes ?isAllOnesConstant(N) :isNullConstant(N);
13926 };
13927
13928bool SwapSelectOps;
13929unsigned OpOffset = Slct.getOpcode() ==RISCVISD::SELECT_CC ? 2 : 0;
13930SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13931SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13932SDValue NonConstantVal;
13933if (isZeroOrAllOnes(TrueVal,AllOnes)) {
13934 SwapSelectOps =false;
13935 NonConstantVal = FalseVal;
13936 }elseif (isZeroOrAllOnes(FalseVal,AllOnes)) {
13937 SwapSelectOps =true;
13938 NonConstantVal = TrueVal;
13939 }else
13940returnSDValue();
13941
13942// Slct is now know to be the desired identity constant when CC is true.
13943 TrueVal = OtherOp;
13944 FalseVal = DAG.getNode(N->getOpcode(),SDLoc(N), VT, OtherOp, NonConstantVal);
13945// Unless SwapSelectOps says the condition should be false.
13946if (SwapSelectOps)
13947std::swap(TrueVal, FalseVal);
13948
13949if (Slct.getOpcode() ==RISCVISD::SELECT_CC)
13950return DAG.getNode(RISCVISD::SELECT_CC,SDLoc(N), VT,
13951 {Slct.getOperand(0), Slct.getOperand(1),
13952 Slct.getOperand(2), TrueVal, FalseVal});
13953
13954return DAG.getNode(ISD::SELECT,SDLoc(N), VT,
13955 {Slct.getOperand(0), TrueVal, FalseVal});
13956}
13957
13958// Attempt combineSelectAndUse on each operand of a commutative operator N.
13959staticSDValuecombineSelectAndUseCommutative(SDNode *N,SelectionDAG &DAG,
13960boolAllOnes,
13961constRISCVSubtarget &Subtarget) {
13962SDValue N0 =N->getOperand(0);
13963SDValue N1 =N->getOperand(1);
13964if (SDValue Result =combineSelectAndUse(N, N0, N1, DAG,AllOnes, Subtarget))
13965return Result;
13966if (SDValue Result =combineSelectAndUse(N, N1, N0, DAG,AllOnes, Subtarget))
13967return Result;
13968returnSDValue();
13969}
13970
13971// Transform (add (mul x, c0), c1) ->
13972// (add (mul (add x, c1/c0), c0), c1%c0).
13973// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13974// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13975// to an infinite loop in DAGCombine if transformed.
13976// Or transform (add (mul x, c0), c1) ->
13977// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13978// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13979// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13980// lead to an infinite loop in DAGCombine if transformed.
13981// Or transform (add (mul x, c0), c1) ->
13982// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13983// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13984// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13985// lead to an infinite loop in DAGCombine if transformed.
13986// Or transform (add (mul x, c0), c1) ->
13987// (mul (add x, c1/c0), c0).
13988// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13989staticSDValuetransformAddImmMulImm(SDNode *N,SelectionDAG &DAG,
13990constRISCVSubtarget &Subtarget) {
13991// Skip for vector types and larger types.
13992EVT VT =N->getValueType(0);
13993if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13994returnSDValue();
13995// The first operand node must be a MUL and has no other use.
13996SDValue N0 =N->getOperand(0);
13997if (!N0->hasOneUse() || N0->getOpcode() !=ISD::MUL)
13998returnSDValue();
13999// Check if c0 and c1 match above conditions.
14000auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14001auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14002if (!N0C || !N1C)
14003returnSDValue();
14004// If N0C has multiple uses it's possible one of the cases in
14005// DAGCombiner::isMulAddWithConstProfitable will be true, which would result
14006// in an infinite loop.
14007if (!N0C->hasOneUse())
14008returnSDValue();
14009 int64_t C0 = N0C->getSExtValue();
14010 int64_t C1 = N1C->getSExtValue();
14011 int64_t CA, CB;
14012if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14013returnSDValue();
14014// Search for proper CA (non-zero) and CB that both are simm12.
14015if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14016 !isInt<12>(C0 * (C1 / C0))) {
14017 CA = C1 / C0;
14018 CB = C1 % C0;
14019 }elseif ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14020 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14021 CA = C1 / C0 + 1;
14022 CB = C1 % C0 - C0;
14023 }elseif ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14024 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14025 CA = C1 / C0 - 1;
14026 CB = C1 % C0 + C0;
14027 }else
14028returnSDValue();
14029// Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14030SDLocDL(N);
14031SDValue New0 = DAG.getNode(ISD::ADD,DL, VT, N0->getOperand(0),
14032 DAG.getSignedConstant(CA,DL, VT));
14033SDValue New1 =
14034 DAG.getNode(ISD::MUL,DL, VT, New0, DAG.getSignedConstant(C0,DL, VT));
14035return DAG.getNode(ISD::ADD,DL, VT, New1, DAG.getSignedConstant(CB,DL, VT));
14036}
14037
14038// add (zext, zext) -> zext (add (zext, zext))
14039// sub (zext, zext) -> sext (sub (zext, zext))
14040// mul (zext, zext) -> zext (mul (zext, zext))
14041// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14042// udiv (zext, zext) -> zext (udiv (zext, zext))
14043// srem (zext, zext) -> zext (srem (zext, zext))
14044// urem (zext, zext) -> zext (urem (zext, zext))
14045//
14046// where the sum of the extend widths match, and the the range of the bin op
14047// fits inside the width of the narrower bin op. (For profitability on rvv, we
14048// use a power of two for both inner and outer extend.)
14049staticSDValuecombineBinOpOfZExt(SDNode *N,SelectionDAG &DAG) {
14050
14051EVT VT =N->getValueType(0);
14052if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14053returnSDValue();
14054
14055SDValue N0 =N->getOperand(0);
14056SDValue N1 =N->getOperand(1);
14057if (N0.getOpcode() !=ISD::ZERO_EXTEND || N1.getOpcode() !=ISD::ZERO_EXTEND)
14058returnSDValue();
14059if (!N0.hasOneUse() || !N1.hasOneUse())
14060returnSDValue();
14061
14062SDValue Src0 = N0.getOperand(0);
14063SDValue Src1 = N1.getOperand(0);
14064EVT SrcVT = Src0.getValueType();
14065if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14066 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14067 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14068returnSDValue();
14069
14070LLVMContext &C = *DAG.getContext();
14071EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(C);
14072EVT NarrowVT =EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14073
14074 Src0 = DAG.getNode(ISD::ZERO_EXTEND,SDLoc(Src0), NarrowVT, Src0);
14075 Src1 = DAG.getNode(ISD::ZERO_EXTEND,SDLoc(Src1), NarrowVT, Src1);
14076
14077// Src0 and Src1 are zero extended, so they're always positive if signed.
14078//
14079// sub can produce a negative from two positive operands, so it needs sign
14080// extended. Other nodes produce a positive from two positive operands, so
14081// zero extend instead.
14082unsigned OuterExtend =
14083N->getOpcode() ==ISD::SUB ?ISD::SIGN_EXTEND :ISD::ZERO_EXTEND;
14084
14085return DAG.getNode(
14086 OuterExtend,SDLoc(N), VT,
14087 DAG.getNode(N->getOpcode(),SDLoc(N), NarrowVT, Src0, Src1));
14088}
14089
14090// Try to turn (add (xor bool, 1) -1) into (neg bool).
14091staticSDValuecombineAddOfBooleanXor(SDNode *N,SelectionDAG &DAG) {
14092SDValue N0 =N->getOperand(0);
14093SDValue N1 =N->getOperand(1);
14094EVT VT =N->getValueType(0);
14095SDLocDL(N);
14096
14097// RHS should be -1.
14098if (!isAllOnesConstant(N1))
14099returnSDValue();
14100
14101// Look for (xor X, 1).
14102if (N0.getOpcode() !=ISD::XOR || !isOneConstant(N0.getOperand(1)))
14103returnSDValue();
14104
14105// First xor input should be 0 or 1.
14106APInt Mask =APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
14107if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14108returnSDValue();
14109
14110// Emit a negate of the setcc.
14111return DAG.getNode(ISD::SUB,DL, VT, DAG.getConstant(0,DL, VT),
14112 N0.getOperand(0));
14113}
14114
14115staticSDValueperformADDCombine(SDNode *N,
14116TargetLowering::DAGCombinerInfo &DCI,
14117constRISCVSubtarget &Subtarget) {
14118SelectionDAG &DAG = DCI.DAG;
14119if (SDValue V =combineAddOfBooleanXor(N, DAG))
14120return V;
14121if (SDValue V =transformAddImmMulImm(N, DAG, Subtarget))
14122return V;
14123if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14124if (SDValue V =transformAddShlImm(N, DAG, Subtarget))
14125return V;
14126if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))
14127return V;
14128if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14129return V;
14130if (SDValue V =combineBinOpOfZExt(N, DAG))
14131return V;
14132
14133// fold (add (select lhs, rhs, cc, 0, y), x) ->
14134// (select lhs, rhs, cc, x, (add x, y))
14135returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/false, Subtarget);
14136}
14137
14138// Try to turn a sub boolean RHS and constant LHS into an addi.
14139staticSDValuecombineSubOfBoolean(SDNode *N,SelectionDAG &DAG) {
14140SDValue N0 =N->getOperand(0);
14141SDValue N1 =N->getOperand(1);
14142EVT VT =N->getValueType(0);
14143SDLocDL(N);
14144
14145// Require a constant LHS.
14146auto *N0C = dyn_cast<ConstantSDNode>(N0);
14147if (!N0C)
14148returnSDValue();
14149
14150// All our optimizations involve subtracting 1 from the immediate and forming
14151// an ADDI. Make sure the new immediate is valid for an ADDI.
14152APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14153if (!ImmValMinus1.isSignedIntN(12))
14154returnSDValue();
14155
14156SDValue NewLHS;
14157if (N1.getOpcode() ==ISD::SETCC && N1.hasOneUse()) {
14158// (sub constant, (setcc x, y, eq/neq)) ->
14159// (add (setcc x, y, neq/eq), constant - 1)
14160ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14161EVT SetCCOpVT = N1.getOperand(0).getValueType();
14162if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14163returnSDValue();
14164 CCVal =ISD::getSetCCInverse(CCVal, SetCCOpVT);
14165 NewLHS =
14166 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14167 }elseif (N1.getOpcode() ==ISD::XOR &&isOneConstant(N1.getOperand(1)) &&
14168 N1.getOperand(0).getOpcode() ==ISD::SETCC) {
14169// (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14170// Since setcc returns a bool the xor is equivalent to 1-setcc.
14171 NewLHS = N1.getOperand(0);
14172 }else
14173returnSDValue();
14174
14175SDValue NewRHS = DAG.getConstant(ImmValMinus1,DL, VT);
14176return DAG.getNode(ISD::ADD,DL, VT, NewLHS, NewRHS);
14177}
14178
14179// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14180// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14181// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14182// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14183staticSDValuecombineSubShiftToOrcB(SDNode *N,SelectionDAG &DAG,
14184constRISCVSubtarget &Subtarget) {
14185if (!Subtarget.hasStdExtZbb())
14186returnSDValue();
14187
14188EVT VT =N->getValueType(0);
14189
14190if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14191returnSDValue();
14192
14193SDValue N0 =N->getOperand(0);
14194SDValue N1 =N->getOperand(1);
14195
14196if (N0->getOpcode() !=ISD::SHL)
14197returnSDValue();
14198
14199auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14200if (!ShAmtCLeft)
14201returnSDValue();
14202unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14203
14204if (ShiftedAmount >= 8)
14205returnSDValue();
14206
14207SDValue LeftShiftOperand = N0->getOperand(0);
14208SDValue RightShiftOperand = N1;
14209
14210if (ShiftedAmount != 0) {// Right operand must be a right shift.
14211if (N1->getOpcode() !=ISD::SRL)
14212returnSDValue();
14213auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14214if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14215returnSDValue();
14216 RightShiftOperand = N1.getOperand(0);
14217 }
14218
14219// At least one shift should have a single use.
14220if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14221returnSDValue();
14222
14223if (LeftShiftOperand != RightShiftOperand)
14224returnSDValue();
14225
14226APInt Mask =APInt::getSplat(VT.getSizeInBits(),APInt(8, 0x1));
14227 Mask <<= ShiftedAmount;
14228// Check that X has indeed the right shape (only the Y-th bit can be set in
14229// every byte).
14230if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14231returnSDValue();
14232
14233return DAG.getNode(RISCVISD::ORC_B,SDLoc(N), VT, LeftShiftOperand);
14234}
14235
14236staticSDValueperformSUBCombine(SDNode *N,SelectionDAG &DAG,
14237constRISCVSubtarget &Subtarget) {
14238if (SDValue V =combineSubOfBoolean(N, DAG))
14239return V;
14240
14241EVT VT =N->getValueType(0);
14242SDValue N0 =N->getOperand(0);
14243SDValue N1 =N->getOperand(1);
14244// fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14245if (isNullConstant(N0) && N1.getOpcode() ==ISD::SETCC && N1.hasOneUse() &&
14246isNullConstant(N1.getOperand(1))) {
14247ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14248if (CCVal ==ISD::SETLT) {
14249SDLocDL(N);
14250unsigned ShAmt = N0.getValueSizeInBits() - 1;
14251return DAG.getNode(ISD::SRA,DL, VT, N1.getOperand(0),
14252 DAG.getConstant(ShAmt,DL, VT));
14253 }
14254 }
14255
14256if (SDValue V =combineBinOpOfZExt(N, DAG))
14257return V;
14258if (SDValue V =combineSubShiftToOrcB(N, DAG, Subtarget))
14259return V;
14260
14261// fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14262// (select lhs, rhs, cc, x, (sub x, y))
14263returncombineSelectAndUse(N, N1, N0, DAG,/*AllOnes*/false, Subtarget);
14264}
14265
14266// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14267// Legalizing setcc can introduce xors like this. Doing this transform reduces
14268// the number of xors and may allow the xor to fold into a branch condition.
14269staticSDValuecombineDeMorganOfBoolean(SDNode *N,SelectionDAG &DAG) {
14270SDValue N0 =N->getOperand(0);
14271SDValue N1 =N->getOperand(1);
14272bool IsAnd =N->getOpcode() ==ISD::AND;
14273
14274if (N0.getOpcode() !=ISD::XOR || N1.getOpcode() !=ISD::XOR)
14275returnSDValue();
14276
14277if (!N0.hasOneUse() || !N1.hasOneUse())
14278returnSDValue();
14279
14280SDValue N01 = N0.getOperand(1);
14281SDValue N11 = N1.getOperand(1);
14282
14283// For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14284// (xor X, -1) based on the upper bits of the other operand being 0. If the
14285// operation is And, allow one of the Xors to use -1.
14286if (isOneConstant(N01)) {
14287if (!isOneConstant(N11) && !(IsAnd &&isAllOnesConstant(N11)))
14288returnSDValue();
14289 }elseif (isOneConstant(N11)) {
14290// N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14291if (!(IsAnd &&isAllOnesConstant(N01)))
14292returnSDValue();
14293 }else
14294returnSDValue();
14295
14296EVT VT =N->getValueType(0);
14297
14298SDValue N00 = N0.getOperand(0);
14299SDValue N10 = N1.getOperand(0);
14300
14301// The LHS of the xors needs to be 0/1.
14302APInt Mask =APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
14303if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14304returnSDValue();
14305
14306// Invert the opcode and insert a new xor.
14307SDLocDL(N);
14308unsigned Opc = IsAnd ?ISD::OR :ISD::AND;
14309SDValue Logic = DAG.getNode(Opc,DL, VT, N00, N10);
14310return DAG.getNode(ISD::XOR,DL, VT, Logic, DAG.getConstant(1,DL, VT));
14311}
14312
14313// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14314// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14315// value to an unsigned value. This will be lowered to vmax and series of
14316// vnclipu instructions later. This can be extended to other truncated types
14317// other than i8 by replacing 256 and 255 with the equivalent constants for the
14318// type.
14319staticSDValuecombineTruncSelectToSMaxUSat(SDNode *N,SelectionDAG &DAG) {
14320EVT VT =N->getValueType(0);
14321SDValue N0 =N->getOperand(0);
14322EVT SrcVT = N0.getValueType();
14323
14324constTargetLowering &TLI = DAG.getTargetLoweringInfo();
14325if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14326returnSDValue();
14327
14328if (N0.getOpcode() !=ISD::VSELECT || !N0.hasOneUse())
14329returnSDValue();
14330
14331SDValueCond = N0.getOperand(0);
14332SDValue True = N0.getOperand(1);
14333SDValue False = N0.getOperand(2);
14334
14335if (Cond.getOpcode() !=ISD::SETCC)
14336returnSDValue();
14337
14338// FIXME: Support the version of this pattern with the select operands
14339// swapped.
14340ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14341if (CCVal !=ISD::SETULT)
14342returnSDValue();
14343
14344SDValue CondLHS =Cond.getOperand(0);
14345SDValue CondRHS =Cond.getOperand(1);
14346
14347if (CondLHS != True)
14348returnSDValue();
14349
14350unsigned ScalarBits = VT.getScalarSizeInBits();
14351
14352// FIXME: Support other constants.
14353ConstantSDNode *CondRHSC =isConstOrConstSplat(CondRHS);
14354if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14355returnSDValue();
14356
14357if (False.getOpcode() !=ISD::SIGN_EXTEND)
14358returnSDValue();
14359
14360 False = False.getOperand(0);
14361
14362if (False.getOpcode() !=ISD::SETCC || False.getOperand(0) != True)
14363returnSDValue();
14364
14365ConstantSDNode *FalseRHSC =isConstOrConstSplat(False.getOperand(1));
14366if (!FalseRHSC || !FalseRHSC->isZero())
14367returnSDValue();
14368
14369ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14370if (CCVal2 !=ISD::SETGT)
14371returnSDValue();
14372
14373// Emit the signed to unsigned saturation pattern.
14374SDLocDL(N);
14375SDValue Max =
14376 DAG.getNode(ISD::SMAX,DL, SrcVT, True, DAG.getConstant(0,DL, SrcVT));
14377SDValue Min =
14378 DAG.getNode(ISD::SMIN,DL, SrcVT, Max,
14379 DAG.getConstant((1ULL << ScalarBits) - 1,DL, SrcVT));
14380return DAG.getNode(ISD::TRUNCATE,DL, VT, Min);
14381}
14382
14383staticSDValueperformTRUNCATECombine(SDNode *N,SelectionDAG &DAG,
14384constRISCVSubtarget &Subtarget) {
14385SDValue N0 =N->getOperand(0);
14386EVT VT =N->getValueType(0);
14387
14388// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14389// extending X. This is safe since we only need the LSB after the shift and
14390// shift amounts larger than 31 would produce poison. If we wait until
14391// type legalization, we'll create RISCVISD::SRLW and we can't recover it
14392// to use a BEXT instruction.
14393if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14394 N0.getValueType() == MVT::i32 && N0.getOpcode() ==ISD::SRL &&
14395 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14396SDLocDL(N0);
14397SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, N0.getOperand(0));
14398SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64, N0.getOperand(1));
14399SDValue Srl = DAG.getNode(ISD::SRL,DL, MVT::i64, Op0, Op1);
14400return DAG.getNode(ISD::TRUNCATE,SDLoc(N), VT, Srl);
14401 }
14402
14403returncombineTruncSelectToSMaxUSat(N, DAG);
14404}
14405
14406// Combines two comparison operation and logic operation to one selection
14407// operation(min, max) and logic operation. Returns new constructed Node if
14408// conditions for optimization are satisfied.
14409staticSDValueperformANDCombine(SDNode *N,
14410TargetLowering::DAGCombinerInfo &DCI,
14411constRISCVSubtarget &Subtarget) {
14412SelectionDAG &DAG = DCI.DAG;
14413
14414SDValue N0 =N->getOperand(0);
14415// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14416// extending X. This is safe since we only need the LSB after the shift and
14417// shift amounts larger than 31 would produce poison. If we wait until
14418// type legalization, we'll create RISCVISD::SRLW and we can't recover it
14419// to use a BEXT instruction.
14420if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14421N->getValueType(0) == MVT::i32 &&isOneConstant(N->getOperand(1)) &&
14422 N0.getOpcode() ==ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14423 N0.hasOneUse()) {
14424SDLocDL(N);
14425SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, N0.getOperand(0));
14426SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64, N0.getOperand(1));
14427SDValue Srl = DAG.getNode(ISD::SRL,DL, MVT::i64, Op0, Op1);
14428SDValueAnd = DAG.getNode(ISD::AND,DL, MVT::i64, Srl,
14429 DAG.getConstant(1,DL, MVT::i64));
14430return DAG.getNode(ISD::TRUNCATE,DL, MVT::i32,And);
14431 }
14432
14433if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))
14434return V;
14435if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14436return V;
14437
14438if (DCI.isAfterLegalizeDAG())
14439if (SDValue V =combineDeMorganOfBoolean(N, DAG))
14440return V;
14441
14442// fold (and (select lhs, rhs, cc, -1, y), x) ->
14443// (select lhs, rhs, cc, x, (and x, y))
14444returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/true, Subtarget);
14445}
14446
14447// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14448// FIXME: Generalize to other binary operators with same operand.
14449staticSDValuecombineOrOfCZERO(SDNode *N,SDValue N0,SDValue N1,
14450SelectionDAG &DAG) {
14451assert(N->getOpcode() ==ISD::OR &&"Unexpected opcode");
14452
14453if (N0.getOpcode() !=RISCVISD::CZERO_EQZ ||
14454 N1.getOpcode() !=RISCVISD::CZERO_NEZ ||
14455 !N0.hasOneUse() || !N1.hasOneUse())
14456returnSDValue();
14457
14458// Should have the same condition.
14459SDValueCond = N0.getOperand(1);
14460if (Cond != N1.getOperand(1))
14461returnSDValue();
14462
14463SDValue TrueV = N0.getOperand(0);
14464SDValue FalseV = N1.getOperand(0);
14465
14466if (TrueV.getOpcode() !=ISD::XOR || FalseV.getOpcode() !=ISD::XOR ||
14467 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14468 !isOneConstant(TrueV.getOperand(1)) ||
14469 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14470returnSDValue();
14471
14472EVT VT =N->getValueType(0);
14473SDLocDL(N);
14474
14475SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV.getOperand(0),
14476Cond);
14477SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV.getOperand(0),
14478Cond);
14479SDValue NewOr = DAG.getNode(ISD::OR,DL, VT, NewN0, NewN1);
14480return DAG.getNode(ISD::XOR,DL, VT, NewOr, TrueV.getOperand(1));
14481}
14482
14483staticSDValueperformORCombine(SDNode *N,TargetLowering::DAGCombinerInfo &DCI,
14484constRISCVSubtarget &Subtarget) {
14485SelectionDAG &DAG = DCI.DAG;
14486
14487if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))
14488return V;
14489if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14490return V;
14491
14492if (DCI.isAfterLegalizeDAG())
14493if (SDValue V =combineDeMorganOfBoolean(N, DAG))
14494return V;
14495
14496// Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14497// We may be able to pull a common operation out of the true and false value.
14498SDValue N0 =N->getOperand(0);
14499SDValue N1 =N->getOperand(1);
14500if (SDValue V =combineOrOfCZERO(N, N0, N1, DAG))
14501return V;
14502if (SDValue V =combineOrOfCZERO(N, N1, N0, DAG))
14503return V;
14504
14505// fold (or (select cond, 0, y), x) ->
14506// (select cond, x, (or x, y))
14507returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/false, Subtarget);
14508}
14509
14510staticSDValueperformXORCombine(SDNode *N,SelectionDAG &DAG,
14511constRISCVSubtarget &Subtarget) {
14512SDValue N0 =N->getOperand(0);
14513SDValue N1 =N->getOperand(1);
14514
14515// Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14516// (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14517// RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14518if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14519N->getValueType(0) == MVT::i32 &&isAllOnesConstant(N1) &&
14520 N0.getOpcode() ==ISD::SHL &&isAllOnesConstant(N0.getOperand(0)) &&
14521 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14522SDLocDL(N);
14523SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, N0.getOperand(0));
14524SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64, N0.getOperand(1));
14525SDValue Shl = DAG.getNode(ISD::SHL,DL, MVT::i64, Op0, Op1);
14526SDValueAnd = DAG.getNOT(DL, Shl, MVT::i64);
14527return DAG.getNode(ISD::TRUNCATE,DL, MVT::i32,And);
14528 }
14529
14530// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14531// NOTE: Assumes ROL being legal means ROLW is legal.
14532constTargetLowering &TLI = DAG.getTargetLoweringInfo();
14533if (N0.getOpcode() ==RISCVISD::SLLW &&
14534isAllOnesConstant(N1) &&isOneConstant(N0.getOperand(0)) &&
14535 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14536SDLocDL(N);
14537return DAG.getNode(RISCVISD::ROLW,DL, MVT::i64,
14538 DAG.getConstant(~1,DL, MVT::i64), N0.getOperand(1));
14539 }
14540
14541// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14542if (N0.getOpcode() ==ISD::SETCC &&isOneConstant(N1) && N0.hasOneUse()) {
14543auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14544ISD::CondCodeCC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14545if (ConstN00 &&CC ==ISD::SETLT) {
14546EVT VT = N0.getValueType();
14547SDLocDL(N0);
14548constAPInt &Imm = ConstN00->getAPIntValue();
14549if ((Imm + 1).isSignedIntN(12))
14550return DAG.getSetCC(DL, VT, N0.getOperand(1),
14551 DAG.getConstant(Imm + 1,DL, VT),CC);
14552 }
14553 }
14554
14555if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))
14556return V;
14557if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14558return V;
14559
14560// fold (xor (select cond, 0, y), x) ->
14561// (select cond, x, (xor x, y))
14562returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/false, Subtarget);
14563}
14564
14565// Try to expand a scalar multiply to a faster sequence.
14566staticSDValueexpandMul(SDNode *N,SelectionDAG &DAG,
14567TargetLowering::DAGCombinerInfo &DCI,
14568constRISCVSubtarget &Subtarget) {
14569
14570EVT VT =N->getValueType(0);
14571
14572// LI + MUL is usually smaller than the alternative sequence.
14573if (DAG.getMachineFunction().getFunction().hasMinSize())
14574returnSDValue();
14575
14576if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14577returnSDValue();
14578
14579if (VT != Subtarget.getXLenVT())
14580returnSDValue();
14581
14582constbool HasShlAdd =
14583 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14584
14585ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14586if (!CNode)
14587returnSDValue();
14588uint64_t MulAmt = CNode->getZExtValue();
14589
14590// WARNING: The code below is knowingly incorrect with regards to undef semantics.
14591// We're adding additional uses of X here, and in principle, we should be freezing
14592// X before doing so. However, adding freeze here causes real regressions, and no
14593// other target properly freezes X in these cases either.
14594SDValueX =N->getOperand(0);
14595
14596if (HasShlAdd) {
14597for (uint64_t Divisor : {3, 5, 9}) {
14598if (MulAmt % Divisor != 0)
14599continue;
14600uint64_t MulAmt2 = MulAmt / Divisor;
14601// 3/5/9 * 2^N -> shl (shXadd X, X), N
14602if (isPowerOf2_64(MulAmt2)) {
14603SDLocDL(N);
14604SDValueX =N->getOperand(0);
14605// Put the shift first if we can fold a zext into the
14606// shift forming a slli.uw.
14607if (X.getOpcode() ==ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14608X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14609SDValue Shl = DAG.getNode(ISD::SHL,DL, VT,X,
14610 DAG.getConstant(Log2_64(MulAmt2),DL, VT));
14611return DAG.getNode(RISCVISD::SHL_ADD,DL, VT, Shl,
14612 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),
14613 Shl);
14614 }
14615// Otherwise, put rhe shl second so that it can fold with following
14616// instructions (e.g. sext or add).
14617SDValue Mul359 =
14618 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,
14619 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);
14620return DAG.getNode(ISD::SHL,DL, VT, Mul359,
14621 DAG.getConstant(Log2_64(MulAmt2),DL, VT));
14622 }
14623
14624// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14625if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14626SDLocDL(N);
14627SDValue Mul359 =
14628 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,
14629 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);
14630return DAG.getNode(RISCVISD::SHL_ADD,DL, VT, Mul359,
14631 DAG.getConstant(Log2_64(MulAmt2 - 1),DL, VT),
14632 Mul359);
14633 }
14634 }
14635
14636// If this is a power 2 + 2/4/8, we can use a shift followed by a single
14637// shXadd. First check if this a sum of two power of 2s because that's
14638// easy. Then count how many zeros are up to the first bit.
14639if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14640unsigned ScaleShift =llvm::countr_zero(MulAmt);
14641if (ScaleShift >= 1 && ScaleShift < 4) {
14642unsigned ShiftAmt =Log2_64((MulAmt & (MulAmt - 1)));
14643SDLocDL(N);
14644SDValue Shift1 =
14645 DAG.getNode(ISD::SHL,DL, VT,X, DAG.getConstant(ShiftAmt,DL, VT));
14646return DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,
14647 DAG.getConstant(ScaleShift,DL, VT), Shift1);
14648 }
14649 }
14650
14651// 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14652// This is the two instruction form, there are also three instruction
14653// variants we could implement. e.g.
14654// (2^(1,2,3) * 3,5,9 + 1) << C2
14655// 2^(C1>3) * 3,5,9 +/- 1
14656for (uint64_t Divisor : {3, 5, 9}) {
14657uint64_tC = MulAmt - 1;
14658if (C <= Divisor)
14659continue;
14660unsigned TZ =llvm::countr_zero(C);
14661if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14662SDLocDL(N);
14663SDValue Mul359 =
14664 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,
14665 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);
14666return DAG.getNode(RISCVISD::SHL_ADD,DL, VT, Mul359,
14667 DAG.getConstant(TZ,DL, VT),X);
14668 }
14669 }
14670
14671// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14672if (MulAmt > 2 &&isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14673unsigned ScaleShift =llvm::countr_zero(MulAmt - 1);
14674if (ScaleShift >= 1 && ScaleShift < 4) {
14675unsigned ShiftAmt =Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14676SDLocDL(N);
14677SDValue Shift1 =
14678 DAG.getNode(ISD::SHL,DL, VT,X, DAG.getConstant(ShiftAmt,DL, VT));
14679return DAG.getNode(ISD::ADD,DL, VT, Shift1,
14680 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,
14681 DAG.getConstant(ScaleShift,DL, VT),X));
14682 }
14683 }
14684
14685// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14686for (uint64_tOffset : {3, 5, 9}) {
14687if (isPowerOf2_64(MulAmt +Offset)) {
14688SDLocDL(N);
14689SDValue Shift1 =
14690 DAG.getNode(ISD::SHL,DL, VT,X,
14691 DAG.getConstant(Log2_64(MulAmt +Offset),DL, VT));
14692SDValue Mul359 =
14693 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,
14694 DAG.getConstant(Log2_64(Offset - 1),DL, VT),X);
14695return DAG.getNode(ISD::SUB,DL, VT, Shift1, Mul359);
14696 }
14697 }
14698 }
14699
14700// 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14701uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14702if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14703uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14704SDLocDL(N);
14705SDValue Shift1 = DAG.getNode(ISD::SHL,DL, VT,N->getOperand(0),
14706 DAG.getConstant(Log2_64(ShiftAmt1),DL, VT));
14707SDValue Shift2 =
14708 DAG.getNode(ISD::SHL,DL, VT,N->getOperand(0),
14709 DAG.getConstant(Log2_64(MulAmtLowBit),DL, VT));
14710return DAG.getNode(ISD::SUB,DL, VT, Shift1, Shift2);
14711 }
14712
14713if (HasShlAdd) {
14714for (uint64_t Divisor : {3, 5, 9}) {
14715if (MulAmt % Divisor != 0)
14716continue;
14717uint64_t MulAmt2 = MulAmt / Divisor;
14718// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14719// of 25 which happen to be quite common.
14720for (uint64_t Divisor2 : {3, 5, 9}) {
14721if (MulAmt2 % Divisor2 != 0)
14722continue;
14723uint64_t MulAmt3 = MulAmt2 / Divisor2;
14724if (isPowerOf2_64(MulAmt3)) {
14725SDLocDL(N);
14726SDValue Mul359A =
14727 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,
14728 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);
14729SDValue Mul359B = DAG.getNode(
14730RISCVISD::SHL_ADD,DL, VT, Mul359A,
14731 DAG.getConstant(Log2_64(Divisor2 - 1),DL, VT), Mul359A);
14732return DAG.getNode(ISD::SHL,DL, VT, Mul359B,
14733 DAG.getConstant(Log2_64(MulAmt3),DL, VT));
14734 }
14735 }
14736 }
14737 }
14738
14739returnSDValue();
14740}
14741
14742// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14743// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14744// Same for other equivalent types with other equivalent constants.
14745staticSDValuecombineVectorMulToSraBitcast(SDNode *N,SelectionDAG &DAG) {
14746EVT VT =N->getValueType(0);
14747constTargetLowering &TLI = DAG.getTargetLoweringInfo();
14748
14749// Do this for legal vectors unless they are i1 or i8 vectors.
14750if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14751returnSDValue();
14752
14753if (N->getOperand(0).getOpcode() !=ISD::AND ||
14754N->getOperand(0).getOperand(0).getOpcode() !=ISD::SRL)
14755returnSDValue();
14756
14757SDValueAnd =N->getOperand(0);
14758SDValue Srl =And.getOperand(0);
14759
14760APInt V1, V2, V3;
14761if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14762 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14763 !ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3))
14764returnSDValue();
14765
14766unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14767if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14768 V3 != (HalfSize - 1))
14769returnSDValue();
14770
14771EVT HalfVT =EVT::getVectorVT(*DAG.getContext(),
14772EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14773 VT.getVectorElementCount() * 2);
14774SDLocDL(N);
14775SDValue Cast = DAG.getNode(ISD::BITCAST,DL, HalfVT, Srl.getOperand(0));
14776SDValue Sra = DAG.getNode(ISD::SRA,DL, HalfVT, Cast,
14777 DAG.getConstant(HalfSize - 1,DL, HalfVT));
14778return DAG.getNode(ISD::BITCAST,DL, VT, Sra);
14779}
14780
14781staticSDValueperformMULCombine(SDNode *N,SelectionDAG &DAG,
14782TargetLowering::DAGCombinerInfo &DCI,
14783constRISCVSubtarget &Subtarget) {
14784EVT VT =N->getValueType(0);
14785if (!VT.isVector())
14786returnexpandMul(N, DAG, DCI, Subtarget);
14787
14788SDLocDL(N);
14789SDValue N0 =N->getOperand(0);
14790SDValue N1 =N->getOperand(1);
14791SDValue MulOper;
14792unsigned AddSubOpc;
14793
14794// vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14795// (mul x, add (y, 1)) -> (add x, (mul x, y))
14796// vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14797// (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14798auto IsAddSubWith1 = [&](SDValue V) ->bool {
14799 AddSubOpc = V->getOpcode();
14800if ((AddSubOpc ==ISD::ADD || AddSubOpc ==ISD::SUB) && V->hasOneUse()) {
14801SDValue Opnd = V->getOperand(1);
14802 MulOper = V->getOperand(0);
14803if (AddSubOpc ==ISD::SUB)
14804std::swap(Opnd, MulOper);
14805if (isOneOrOneSplat(Opnd))
14806returntrue;
14807 }
14808returnfalse;
14809 };
14810
14811if (IsAddSubWith1(N0)) {
14812SDValue MulVal = DAG.getNode(ISD::MUL,DL, VT, N1, MulOper);
14813return DAG.getNode(AddSubOpc,DL, VT, N1, MulVal);
14814 }
14815
14816if (IsAddSubWith1(N1)) {
14817SDValue MulVal = DAG.getNode(ISD::MUL,DL, VT, N0, MulOper);
14818return DAG.getNode(AddSubOpc,DL, VT, N0, MulVal);
14819 }
14820
14821if (SDValue V =combineBinOpOfZExt(N, DAG))
14822return V;
14823
14824if (SDValue V =combineVectorMulToSraBitcast(N, DAG))
14825return V;
14826
14827returnSDValue();
14828}
14829
14830/// According to the property that indexed load/store instructions zero-extend
14831/// their indices, try to narrow the type of index operand.
14832staticboolnarrowIndex(SDValue &N,ISD::MemIndexType IndexType,SelectionDAG &DAG) {
14833if (isIndexTypeSigned(IndexType))
14834returnfalse;
14835
14836if (!N->hasOneUse())
14837returnfalse;
14838
14839EVT VT =N.getValueType();
14840SDLocDL(N);
14841
14842// In general, what we're doing here is seeing if we can sink a truncate to
14843// a smaller element type into the expression tree building our index.
14844// TODO: We can generalize this and handle a bunch more cases if useful.
14845
14846// Narrow a buildvector to the narrowest element type. This requires less
14847// work and less register pressure at high LMUL, and creates smaller constants
14848// which may be cheaper to materialize.
14849if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14850KnownBits Known = DAG.computeKnownBits(N);
14851unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14852LLVMContext &C = *DAG.getContext();
14853EVT ResultVT =EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14854if (ResultVT.bitsLT(VT.getVectorElementType())) {
14855N = DAG.getNode(ISD::TRUNCATE,DL,
14856 VT.changeVectorElementType(ResultVT),N);
14857returntrue;
14858 }
14859 }
14860
14861// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14862if (N.getOpcode() !=ISD::SHL)
14863returnfalse;
14864
14865SDValue N0 =N.getOperand(0);
14866if (N0.getOpcode() !=ISD::ZERO_EXTEND &&
14867 N0.getOpcode() !=RISCVISD::VZEXT_VL)
14868returnfalse;
14869if (!N0->hasOneUse())
14870returnfalse;
14871
14872APInt ShAmt;
14873SDValue N1 =N.getOperand(1);
14874if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14875returnfalse;
14876
14877SDValue Src = N0.getOperand(0);
14878EVT SrcVT = Src.getValueType();
14879unsigned SrcElen = SrcVT.getScalarSizeInBits();
14880unsigned ShAmtV = ShAmt.getZExtValue();
14881unsigned NewElen =PowerOf2Ceil(SrcElen + ShAmtV);
14882 NewElen = std::max(NewElen, 8U);
14883
14884// Skip if NewElen is not narrower than the original extended type.
14885if (NewElen >= N0.getValueType().getScalarSizeInBits())
14886returnfalse;
14887
14888EVT NewEltVT =EVT::getIntegerVT(*DAG.getContext(), NewElen);
14889EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14890
14891SDValue NewExt = DAG.getNode(N0->getOpcode(),DL, NewVT, N0->ops());
14892SDValue NewShAmtVec = DAG.getConstant(ShAmtV,DL, NewVT);
14893N = DAG.getNode(ISD::SHL,DL, NewVT, NewExt, NewShAmtVec);
14894returntrue;
14895}
14896
14897// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14898// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14899// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14900// can become a sext.w instead of a shift pair.
14901staticSDValueperformSETCCCombine(SDNode *N,SelectionDAG &DAG,
14902constRISCVSubtarget &Subtarget) {
14903SDValue N0 =N->getOperand(0);
14904SDValue N1 =N->getOperand(1);
14905EVT VT =N->getValueType(0);
14906EVT OpVT = N0.getValueType();
14907
14908if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14909returnSDValue();
14910
14911// RHS needs to be a constant.
14912auto *N1C = dyn_cast<ConstantSDNode>(N1);
14913if (!N1C)
14914returnSDValue();
14915
14916// LHS needs to be (and X, 0xffffffff).
14917if (N0.getOpcode() !=ISD::AND || !N0.hasOneUse() ||
14918 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14919 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14920returnSDValue();
14921
14922// Looking for an equality compare.
14923ISD::CondCodeCond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14924if (!isIntEqualitySetCC(Cond))
14925returnSDValue();
14926
14927// Don't do this if the sign bit is provably zero, it will be turned back into
14928// an AND.
14929APInt SignMask =APInt::getOneBitSet(64, 31);
14930if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14931returnSDValue();
14932
14933constAPInt &C1 = N1C->getAPIntValue();
14934
14935SDLoc dl(N);
14936// If the constant is larger than 2^32 - 1 it is impossible for both sides
14937// to be equal.
14938if (C1.getActiveBits() > 32)
14939return DAG.getBoolConstant(Cond ==ISD::SETNE, dl, VT, OpVT);
14940
14941SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG,N, OpVT,
14942 N0.getOperand(0), DAG.getValueType(MVT::i32));
14943return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14944 dl, OpVT),Cond);
14945}
14946
14947staticSDValue
14948performSIGN_EXTEND_INREGCombine(SDNode *N,SelectionDAG &DAG,
14949constRISCVSubtarget &Subtarget) {
14950SDValue Src =N->getOperand(0);
14951EVT VT =N->getValueType(0);
14952EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14953unsigned Opc = Src.getOpcode();
14954
14955// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14956// Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14957if (Opc ==RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14958 Subtarget.hasStdExtZfhmin())
14959return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH,SDLoc(N), VT,
14960 Src.getOperand(0));
14961
14962// Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14963if (Opc ==ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14964 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14965 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14966return DAG.getNode(RISCVISD::SLLW,SDLoc(N), VT, Src.getOperand(0),
14967 Src.getOperand(1));
14968
14969returnSDValue();
14970}
14971
14972namespace{
14973// Forward declaration of the structure holding the necessary information to
14974// apply a combine.
14975structCombineResult;
14976
14977enum ExtKind :uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14978/// Helper class for folding sign/zero extensions.
14979/// In particular, this class is used for the following combines:
14980/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14981/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14982/// mul | mul_vl -> vwmul(u) | vwmul_su
14983/// shl | shl_vl -> vwsll
14984/// fadd -> vfwadd | vfwadd_w
14985/// fsub -> vfwsub | vfwsub_w
14986/// fmul -> vfwmul
14987/// An object of this class represents an operand of the operation we want to
14988/// combine.
14989/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14990/// NodeExtensionHelper for `a` and one for `b`.
14991///
14992/// This class abstracts away how the extension is materialized and
14993/// how its number of users affect the combines.
14994///
14995/// In particular:
14996/// - VWADD_W is conceptually == add(op0, sext(op1))
14997/// - VWADDU_W == add(op0, zext(op1))
14998/// - VWSUB_W == sub(op0, sext(op1))
14999/// - VWSUBU_W == sub(op0, zext(op1))
15000/// - VFWADD_W == fadd(op0, fpext(op1))
15001/// - VFWSUB_W == fsub(op0, fpext(op1))
15002/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
15003/// zext|sext(smaller_value).
15004structNodeExtensionHelper {
15005 /// Records if this operand is like being zero extended.
15006bool SupportsZExt;
15007 /// Records if this operand is like being sign extended.
15008 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
15009 /// instance, a splat constant (e.g., 3), would support being both sign and
15010 /// zero extended.
15011bool SupportsSExt;
15012 /// Records if this operand is like being floating-Point extended.
15013bool SupportsFPExt;
15014 /// This boolean captures whether we care if this operand would still be
15015 /// around after the folding happens.
15016bool EnforceOneUse;
15017 /// Original value that this NodeExtensionHelper represents.
15018SDValue OrigOperand;
15019
15020 /// Get the value feeding the extension or the value itself.
15021 /// E.g., for zext(a), this would return a.
15022SDValue getSource() const{
15023switch (OrigOperand.getOpcode()) {
15024caseISD::ZERO_EXTEND:
15025caseISD::SIGN_EXTEND:
15026caseRISCVISD::VSEXT_VL:
15027caseRISCVISD::VZEXT_VL:
15028caseRISCVISD::FP_EXTEND_VL:
15029return OrigOperand.getOperand(0);
15030default:
15031return OrigOperand;
15032 }
15033 }
15034
15035 /// Check if this instance represents a splat.
15036boolisSplat() const{
15037return OrigOperand.getOpcode() ==RISCVISD::VMV_V_X_VL ||
15038 OrigOperand.getOpcode() ==ISD::SPLAT_VECTOR;
15039 }
15040
15041 /// Get the extended opcode.
15042unsigned getExtOpc(ExtKind SupportsExt) const{
15043switch (SupportsExt) {
15044case ExtKind::SExt:
15045returnRISCVISD::VSEXT_VL;
15046case ExtKind::ZExt:
15047returnRISCVISD::VZEXT_VL;
15048case ExtKind::FPExt:
15049returnRISCVISD::FP_EXTEND_VL;
15050 }
15051llvm_unreachable("Unknown ExtKind enum");
15052 }
15053
15054 /// Get or create a value that can feed \p Root with the given extension \p
15055 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15056 /// operand. \see ::getSource().
15057SDValue getOrCreateExtendedOp(SDNode *Root,SelectionDAG &DAG,
15058constRISCVSubtarget &Subtarget,
15059 std::optional<ExtKind> SupportsExt) const{
15060if (!SupportsExt.has_value())
15061return OrigOperand;
15062
15063MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15064
15065SDValueSource = getSource();
15066assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15067if (Source.getValueType() == NarrowVT)
15068returnSource;
15069
15070// vfmadd_vl -> vfwmadd_vl can take bf16 operands
15071if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15072assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15073 Root->getOpcode() ==RISCVISD::VFMADD_VL);
15074returnSource;
15075 }
15076
15077unsigned ExtOpc = getExtOpc(*SupportsExt);
15078
15079// If we need an extension, we should be changing the type.
15080SDLocDL(OrigOperand);
15081auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15082switch (OrigOperand.getOpcode()) {
15083caseISD::ZERO_EXTEND:
15084caseISD::SIGN_EXTEND:
15085caseRISCVISD::VSEXT_VL:
15086caseRISCVISD::VZEXT_VL:
15087caseRISCVISD::FP_EXTEND_VL:
15088return DAG.getNode(ExtOpc,DL, NarrowVT, Source, Mask, VL);
15089caseISD::SPLAT_VECTOR:
15090return DAG.getSplat(NarrowVT,DL,Source.getOperand(0));
15091caseRISCVISD::VMV_V_X_VL:
15092return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, NarrowVT,
15093 DAG.getUNDEF(NarrowVT),Source.getOperand(1), VL);
15094caseRISCVISD::VFMV_V_F_VL:
15095Source =Source.getOperand(1);
15096assert(Source.getOpcode() ==ISD::FP_EXTEND &&"Unexpected source");
15097Source =Source.getOperand(0);
15098assert(Source.getValueType() == NarrowVT.getVectorElementType());
15099return DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, NarrowVT,
15100 DAG.getUNDEF(NarrowVT), Source, VL);
15101default:
15102// Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15103// and that operand should already have the right NarrowVT so no
15104// extension should be required at this point.
15105llvm_unreachable("Unsupported opcode");
15106 }
15107 }
15108
15109 /// Helper function to get the narrow type for \p Root.
15110 /// The narrow type is the type of \p Root where we divided the size of each
15111 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15112 /// \pre Both the narrow type and the original type should be legal.
15113staticMVT getNarrowType(constSDNode *Root, ExtKind SupportsExt) {
15114MVT VT = Root->getSimpleValueType(0);
15115
15116// Determine the narrow size.
15117unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15118
15119MVT EltVT = SupportsExt == ExtKind::FPExt
15120 ?MVT::getFloatingPointVT(NarrowSize)
15121 :MVT::getIntegerVT(NarrowSize);
15122
15123assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15124"Trying to extend something we can't represent");
15125MVT NarrowVT =MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15126return NarrowVT;
15127 }
15128
15129 /// Get the opcode to materialize:
15130 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15131staticunsigned getSExtOpcode(unsigned Opcode) {
15132switch (Opcode) {
15133caseISD::ADD:
15134caseRISCVISD::ADD_VL:
15135caseRISCVISD::VWADD_W_VL:
15136caseRISCVISD::VWADDU_W_VL:
15137caseISD::OR:
15138returnRISCVISD::VWADD_VL;
15139caseISD::SUB:
15140caseRISCVISD::SUB_VL:
15141caseRISCVISD::VWSUB_W_VL:
15142caseRISCVISD::VWSUBU_W_VL:
15143returnRISCVISD::VWSUB_VL;
15144caseISD::MUL:
15145caseRISCVISD::MUL_VL:
15146returnRISCVISD::VWMUL_VL;
15147default:
15148llvm_unreachable("Unexpected opcode");
15149 }
15150 }
15151
15152 /// Get the opcode to materialize:
15153 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15154staticunsigned getZExtOpcode(unsigned Opcode) {
15155switch (Opcode) {
15156caseISD::ADD:
15157caseRISCVISD::ADD_VL:
15158caseRISCVISD::VWADD_W_VL:
15159caseRISCVISD::VWADDU_W_VL:
15160caseISD::OR:
15161returnRISCVISD::VWADDU_VL;
15162caseISD::SUB:
15163caseRISCVISD::SUB_VL:
15164caseRISCVISD::VWSUB_W_VL:
15165caseRISCVISD::VWSUBU_W_VL:
15166returnRISCVISD::VWSUBU_VL;
15167caseISD::MUL:
15168caseRISCVISD::MUL_VL:
15169returnRISCVISD::VWMULU_VL;
15170caseISD::SHL:
15171caseRISCVISD::SHL_VL:
15172returnRISCVISD::VWSLL_VL;
15173default:
15174llvm_unreachable("Unexpected opcode");
15175 }
15176 }
15177
15178 /// Get the opcode to materialize:
15179 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15180staticunsigned getFPExtOpcode(unsigned Opcode) {
15181switch (Opcode) {
15182caseRISCVISD::FADD_VL:
15183caseRISCVISD::VFWADD_W_VL:
15184returnRISCVISD::VFWADD_VL;
15185caseRISCVISD::FSUB_VL:
15186caseRISCVISD::VFWSUB_W_VL:
15187returnRISCVISD::VFWSUB_VL;
15188caseRISCVISD::FMUL_VL:
15189returnRISCVISD::VFWMUL_VL;
15190caseRISCVISD::VFMADD_VL:
15191returnRISCVISD::VFWMADD_VL;
15192caseRISCVISD::VFMSUB_VL:
15193returnRISCVISD::VFWMSUB_VL;
15194caseRISCVISD::VFNMADD_VL:
15195returnRISCVISD::VFWNMADD_VL;
15196caseRISCVISD::VFNMSUB_VL:
15197returnRISCVISD::VFWNMSUB_VL;
15198default:
15199llvm_unreachable("Unexpected opcode");
15200 }
15201 }
15202
15203 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15204 /// newOpcode(a, b).
15205staticunsigned getSUOpcode(unsigned Opcode) {
15206assert((Opcode ==RISCVISD::MUL_VL || Opcode ==ISD::MUL) &&
15207"SU is only supported for MUL");
15208returnRISCVISD::VWMULSU_VL;
15209 }
15210
15211 /// Get the opcode to materialize
15212 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15213staticunsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15214switch (Opcode) {
15215caseISD::ADD:
15216caseRISCVISD::ADD_VL:
15217caseISD::OR:
15218return SupportsExt == ExtKind::SExt ?RISCVISD::VWADD_W_VL
15219 :RISCVISD::VWADDU_W_VL;
15220caseISD::SUB:
15221caseRISCVISD::SUB_VL:
15222return SupportsExt == ExtKind::SExt ?RISCVISD::VWSUB_W_VL
15223 :RISCVISD::VWSUBU_W_VL;
15224caseRISCVISD::FADD_VL:
15225returnRISCVISD::VFWADD_W_VL;
15226caseRISCVISD::FSUB_VL:
15227returnRISCVISD::VFWSUB_W_VL;
15228default:
15229llvm_unreachable("Unexpected opcode");
15230 }
15231 }
15232
15233usingCombineToTry = std::function<std::optional<CombineResult>(
15234SDNode */*Root*/,const NodeExtensionHelper &/*LHS*/,
15235const NodeExtensionHelper &/*RHS*/,SelectionDAG &,
15236constRISCVSubtarget &)>;
15237
15238 /// Check if this node needs to be fully folded or extended for all users.
15239bool needToPromoteOtherUsers() const{return EnforceOneUse; }
15240
15241void fillUpExtensionSupportForSplat(SDNode *Root,SelectionDAG &DAG,
15242constRISCVSubtarget &Subtarget) {
15243unsigned Opc = OrigOperand.getOpcode();
15244MVT VT = OrigOperand.getSimpleValueType();
15245
15246assert((Opc ==ISD::SPLAT_VECTOR || Opc ==RISCVISD::VMV_V_X_VL) &&
15247"Unexpected Opcode");
15248
15249// The pasthru must be undef for tail agnostic.
15250if (Opc ==RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15251return;
15252
15253// Get the scalar value.
15254SDValueOp = Opc ==ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15255 : OrigOperand.getOperand(1);
15256
15257// See if we have enough sign bits or zero bits in the scalar to use a
15258// widening opcode by splatting to smaller element size.
15259unsigned EltBits = VT.getScalarSizeInBits();
15260unsigned ScalarBits =Op.getValueSizeInBits();
15261// If we're not getting all bits from the element, we need special handling.
15262if (ScalarBits < EltBits) {
15263// This should only occur on RV32.
15264assert(Opc ==RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15265 !Subtarget.is64Bit() &&"Unexpected splat");
15266// vmv.v.x sign extends narrow inputs.
15267 SupportsSExt =true;
15268
15269// If the input is positive, then sign extend is also zero extend.
15270if (DAG.SignBitIsZero(Op))
15271 SupportsZExt =true;
15272
15273 EnforceOneUse =false;
15274return;
15275 }
15276
15277unsigned NarrowSize = EltBits / 2;
15278// If the narrow type cannot be expressed with a legal VMV,
15279// this is not a valid candidate.
15280if (NarrowSize < 8)
15281return;
15282
15283if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15284 SupportsSExt =true;
15285
15286if (DAG.MaskedValueIsZero(Op,
15287APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15288 SupportsZExt =true;
15289
15290 EnforceOneUse =false;
15291 }
15292
15293bool isSupportedFPExtend(SDNode *Root,MVT NarrowEltVT,
15294constRISCVSubtarget &Subtarget) {
15295// Any f16 extension will neeed zvfh
15296if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15297returnfalse;
15298// The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15299// zvfbfwma
15300if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15301 Root->getOpcode() !=RISCVISD::VFMADD_VL))
15302returnfalse;
15303returntrue;
15304 }
15305
15306 /// Helper method to set the various fields of this struct based on the
15307 /// type of \p Root.
15308void fillUpExtensionSupport(SDNode *Root,SelectionDAG &DAG,
15309constRISCVSubtarget &Subtarget) {
15310 SupportsZExt =false;
15311 SupportsSExt =false;
15312 SupportsFPExt =false;
15313 EnforceOneUse =true;
15314unsigned Opc = OrigOperand.getOpcode();
15315// For the nodes we handle below, we end up using their inputs directly: see
15316// getSource(). However since they either don't have a passthru or we check
15317// that their passthru is undef, we can safely ignore their mask and VL.
15318switch (Opc) {
15319caseISD::ZERO_EXTEND:
15320caseISD::SIGN_EXTEND: {
15321MVT VT = OrigOperand.getSimpleValueType();
15322if (!VT.isVector())
15323break;
15324
15325SDValue NarrowElt = OrigOperand.getOperand(0);
15326MVT NarrowVT = NarrowElt.getSimpleValueType();
15327// i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15328if (NarrowVT.getVectorElementType() == MVT::i1)
15329break;
15330
15331 SupportsZExt = Opc ==ISD::ZERO_EXTEND;
15332 SupportsSExt = Opc ==ISD::SIGN_EXTEND;
15333break;
15334 }
15335caseRISCVISD::VZEXT_VL:
15336 SupportsZExt =true;
15337break;
15338caseRISCVISD::VSEXT_VL:
15339 SupportsSExt =true;
15340break;
15341caseRISCVISD::FP_EXTEND_VL: {
15342MVT NarrowEltVT =
15343 OrigOperand.getOperand(0).getSimpleValueType().getVectorElementType();
15344if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15345break;
15346 SupportsFPExt =true;
15347break;
15348 }
15349caseISD::SPLAT_VECTOR:
15350caseRISCVISD::VMV_V_X_VL:
15351 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15352break;
15353caseRISCVISD::VFMV_V_F_VL: {
15354MVT VT = OrigOperand.getSimpleValueType();
15355
15356if (!OrigOperand.getOperand(0).isUndef())
15357break;
15358
15359SDValueOp = OrigOperand.getOperand(1);
15360if (Op.getOpcode() !=ISD::FP_EXTEND)
15361break;
15362
15363if (!isSupportedFPExtend(Root,Op.getOperand(0).getSimpleValueType(),
15364 Subtarget))
15365break;
15366
15367unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15368unsigned ScalarBits =Op.getOperand(0).getValueSizeInBits();
15369if (NarrowSize != ScalarBits)
15370break;
15371
15372 SupportsFPExt =true;
15373break;
15374 }
15375default:
15376break;
15377 }
15378 }
15379
15380 /// Check if \p Root supports any extension folding combines.
15381staticbool isSupportedRoot(constSDNode *Root,
15382constRISCVSubtarget &Subtarget) {
15383switch (Root->getOpcode()) {
15384caseISD::ADD:
15385caseISD::SUB:
15386caseISD::MUL: {
15387return Root->getValueType(0).isScalableVector();
15388 }
15389caseISD::OR: {
15390return Root->getValueType(0).isScalableVector() &&
15391 Root->getFlags().hasDisjoint();
15392 }
15393// Vector Widening Integer Add/Sub/Mul Instructions
15394caseRISCVISD::ADD_VL:
15395caseRISCVISD::MUL_VL:
15396caseRISCVISD::VWADD_W_VL:
15397caseRISCVISD::VWADDU_W_VL:
15398caseRISCVISD::SUB_VL:
15399caseRISCVISD::VWSUB_W_VL:
15400caseRISCVISD::VWSUBU_W_VL:
15401// Vector Widening Floating-Point Add/Sub/Mul Instructions
15402caseRISCVISD::FADD_VL:
15403caseRISCVISD::FSUB_VL:
15404caseRISCVISD::FMUL_VL:
15405caseRISCVISD::VFWADD_W_VL:
15406caseRISCVISD::VFWSUB_W_VL:
15407returntrue;
15408caseISD::SHL:
15409return Root->getValueType(0).isScalableVector() &&
15410 Subtarget.hasStdExtZvbb();
15411caseRISCVISD::SHL_VL:
15412return Subtarget.hasStdExtZvbb();
15413caseRISCVISD::VFMADD_VL:
15414caseRISCVISD::VFNMSUB_VL:
15415caseRISCVISD::VFNMADD_VL:
15416caseRISCVISD::VFMSUB_VL:
15417returntrue;
15418default:
15419returnfalse;
15420 }
15421 }
15422
15423 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15424 NodeExtensionHelper(SDNode *Root,unsigned OperandIdx,SelectionDAG &DAG,
15425constRISCVSubtarget &Subtarget) {
15426assert(isSupportedRoot(Root, Subtarget) &&
15427"Trying to build an helper with an "
15428"unsupported root");
15429assert(OperandIdx < 2 &&"Requesting something else than LHS or RHS");
15430assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));
15431 OrigOperand = Root->getOperand(OperandIdx);
15432
15433unsigned Opc = Root->getOpcode();
15434switch (Opc) {
15435// We consider
15436// VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15437// VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15438// VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15439caseRISCVISD::VWADD_W_VL:
15440caseRISCVISD::VWADDU_W_VL:
15441caseRISCVISD::VWSUB_W_VL:
15442caseRISCVISD::VWSUBU_W_VL:
15443caseRISCVISD::VFWADD_W_VL:
15444caseRISCVISD::VFWSUB_W_VL:
15445if (OperandIdx == 1) {
15446 SupportsZExt =
15447 Opc ==RISCVISD::VWADDU_W_VL || Opc ==RISCVISD::VWSUBU_W_VL;
15448 SupportsSExt =
15449 Opc ==RISCVISD::VWADD_W_VL || Opc ==RISCVISD::VWSUB_W_VL;
15450 SupportsFPExt =
15451 Opc ==RISCVISD::VFWADD_W_VL || Opc ==RISCVISD::VFWSUB_W_VL;
15452// There's no existing extension here, so we don't have to worry about
15453// making sure it gets removed.
15454 EnforceOneUse =false;
15455break;
15456 }
15457 [[fallthrough]];
15458default:
15459 fillUpExtensionSupport(Root, DAG, Subtarget);
15460break;
15461 }
15462 }
15463
15464 /// Helper function to get the Mask and VL from \p Root.
15465static std::pair<SDValue, SDValue>
15466 getMaskAndVL(constSDNode *Root,SelectionDAG &DAG,
15467constRISCVSubtarget &Subtarget) {
15468assert(isSupportedRoot(Root, Subtarget) &&"Unexpected root");
15469switch (Root->getOpcode()) {
15470caseISD::ADD:
15471caseISD::SUB:
15472caseISD::MUL:
15473caseISD::OR:
15474caseISD::SHL: {
15475SDLocDL(Root);
15476MVT VT = Root->getSimpleValueType(0);
15477returngetDefaultScalableVLOps(VT,DL, DAG, Subtarget);
15478 }
15479default:
15480return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15481 }
15482 }
15483
15484 /// Helper function to check if \p N is commutative with respect to the
15485 /// foldings that are supported by this class.
15486staticboolisCommutative(constSDNode *N) {
15487switch (N->getOpcode()) {
15488caseISD::ADD:
15489caseISD::MUL:
15490caseISD::OR:
15491caseRISCVISD::ADD_VL:
15492caseRISCVISD::MUL_VL:
15493caseRISCVISD::VWADD_W_VL:
15494caseRISCVISD::VWADDU_W_VL:
15495caseRISCVISD::FADD_VL:
15496caseRISCVISD::FMUL_VL:
15497caseRISCVISD::VFWADD_W_VL:
15498caseRISCVISD::VFMADD_VL:
15499caseRISCVISD::VFNMSUB_VL:
15500caseRISCVISD::VFNMADD_VL:
15501caseRISCVISD::VFMSUB_VL:
15502returntrue;
15503caseISD::SUB:
15504caseRISCVISD::SUB_VL:
15505caseRISCVISD::VWSUB_W_VL:
15506caseRISCVISD::VWSUBU_W_VL:
15507caseRISCVISD::FSUB_VL:
15508caseRISCVISD::VFWSUB_W_VL:
15509caseISD::SHL:
15510caseRISCVISD::SHL_VL:
15511returnfalse;
15512default:
15513llvm_unreachable("Unexpected opcode");
15514 }
15515 }
15516
15517 /// Get a list of combine to try for folding extensions in \p Root.
15518 /// Note that each returned CombineToTry function doesn't actually modify
15519 /// anything. Instead they produce an optional CombineResult that if not None,
15520 /// need to be materialized for the combine to be applied.
15521 /// \see CombineResult::materialize.
15522 /// If the related CombineToTry function returns std::nullopt, that means the
15523 /// combine didn't match.
15524staticSmallVector<CombineToTry> getSupportedFoldings(constSDNode *Root);
15525};
15526
15527/// Helper structure that holds all the necessary information to materialize a
15528/// combine that does some extension folding.
15529structCombineResult {
15530 /// Opcode to be generated when materializing the combine.
15531unsigned TargetOpcode;
15532// No value means no extension is needed.
15533 std::optional<ExtKind> LHSExt;
15534 std::optional<ExtKind> RHSExt;
15535 /// Root of the combine.
15536SDNode *Root;
15537 /// LHS of the TargetOpcode.
15538 NodeExtensionHelperLHS;
15539 /// RHS of the TargetOpcode.
15540 NodeExtensionHelperRHS;
15541
15542 CombineResult(unsigned TargetOpcode,SDNode *Root,
15543const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15544const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15545 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15546LHS(LHS),RHS(RHS) {}
15547
15548 /// Return a value that uses TargetOpcode and that can be used to replace
15549 /// Root.
15550 /// The actual replacement is *not* done in that method.
15551SDValue materialize(SelectionDAG &DAG,
15552constRISCVSubtarget &Subtarget) const{
15553SDValueMask, VL, Passthru;
15554 std::tie(Mask, VL) =
15555 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15556switch (Root->getOpcode()) {
15557default:
15558 Passthru = Root->getOperand(2);
15559break;
15560caseISD::ADD:
15561caseISD::SUB:
15562caseISD::MUL:
15563caseISD::OR:
15564caseISD::SHL:
15565 Passthru = DAG.getUNDEF(Root->getValueType(0));
15566break;
15567 }
15568return DAG.getNode(TargetOpcode,SDLoc(Root), Root->getValueType(0),
15569LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15570RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15571 Passthru, Mask, VL);
15572 }
15573};
15574
15575/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15576/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15577/// are zext) and LHS and RHS can be folded into Root.
15578/// AllowExtMask define which form `ext` can take in this pattern.
15579///
15580/// \note If the pattern can match with both zext and sext, the returned
15581/// CombineResult will feature the zext result.
15582///
15583/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15584/// can be used to apply the pattern.
15585static std::optional<CombineResult>
15586canFoldToVWWithSameExtensionImpl(SDNode *Root,const NodeExtensionHelper &LHS,
15587const NodeExtensionHelper &RHS,
15588uint8_t AllowExtMask,SelectionDAG &DAG,
15589constRISCVSubtarget &Subtarget) {
15590if ((AllowExtMask & ExtKind::ZExt) &&LHS.SupportsZExt &&RHS.SupportsZExt)
15591return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15592 Root, LHS,/*LHSExt=*/{ExtKind::ZExt}, RHS,
15593/*RHSExt=*/{ExtKind::ZExt});
15594if ((AllowExtMask & ExtKind::SExt) &&LHS.SupportsSExt &&RHS.SupportsSExt)
15595return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15596 Root, LHS,/*LHSExt=*/{ExtKind::SExt}, RHS,
15597/*RHSExt=*/{ExtKind::SExt});
15598if ((AllowExtMask & ExtKind::FPExt) &&LHS.SupportsFPExt &&RHS.SupportsFPExt)
15599return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15600 Root, LHS,/*LHSExt=*/{ExtKind::FPExt}, RHS,
15601/*RHSExt=*/{ExtKind::FPExt});
15602return std::nullopt;
15603}
15604
15605/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15606/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15607/// are zext) and LHS and RHS can be folded into Root.
15608///
15609/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15610/// can be used to apply the pattern.
15611static std::optional<CombineResult>
15612canFoldToVWWithSameExtension(SDNode *Root,const NodeExtensionHelper &LHS,
15613const NodeExtensionHelper &RHS,SelectionDAG &DAG,
15614constRISCVSubtarget &Subtarget) {
15615return canFoldToVWWithSameExtensionImpl(
15616 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15617 Subtarget);
15618}
15619
15620/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15621///
15622/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15623/// can be used to apply the pattern.
15624static std::optional<CombineResult>
15625canFoldToVW_W(SDNode *Root,const NodeExtensionHelper &LHS,
15626const NodeExtensionHelper &RHS,SelectionDAG &DAG,
15627constRISCVSubtarget &Subtarget) {
15628if (RHS.SupportsFPExt)
15629return CombineResult(
15630 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15631 Root, LHS,/*LHSExt=*/std::nullopt, RHS,/*RHSExt=*/{ExtKind::FPExt});
15632
15633// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15634// sext/zext?
15635// Control this behavior behind an option (AllowSplatInVW_W) for testing
15636// purposes.
15637if (RHS.SupportsZExt && (!RHS.isSplat() ||AllowSplatInVW_W))
15638return CombineResult(
15639 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15640 LHS,/*LHSExt=*/std::nullopt, RHS,/*RHSExt=*/{ExtKind::ZExt});
15641if (RHS.SupportsSExt && (!RHS.isSplat() ||AllowSplatInVW_W))
15642return CombineResult(
15643 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15644 LHS,/*LHSExt=*/std::nullopt, RHS,/*RHSExt=*/{ExtKind::SExt});
15645return std::nullopt;
15646}
15647
15648/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15649///
15650/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15651/// can be used to apply the pattern.
15652static std::optional<CombineResult>
15653canFoldToVWWithSEXT(SDNode *Root,const NodeExtensionHelper &LHS,
15654const NodeExtensionHelper &RHS,SelectionDAG &DAG,
15655constRISCVSubtarget &Subtarget) {
15656return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15657 Subtarget);
15658}
15659
15660/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15661///
15662/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15663/// can be used to apply the pattern.
15664static std::optional<CombineResult>
15665canFoldToVWWithZEXT(SDNode *Root,const NodeExtensionHelper &LHS,
15666const NodeExtensionHelper &RHS,SelectionDAG &DAG,
15667constRISCVSubtarget &Subtarget) {
15668return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15669 Subtarget);
15670}
15671
15672/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15673///
15674/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15675/// can be used to apply the pattern.
15676static std::optional<CombineResult>
15677canFoldToVWWithFPEXT(SDNode *Root,const NodeExtensionHelper &LHS,
15678const NodeExtensionHelper &RHS,SelectionDAG &DAG,
15679constRISCVSubtarget &Subtarget) {
15680return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15681 Subtarget);
15682}
15683
15684/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15685///
15686/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15687/// can be used to apply the pattern.
15688static std::optional<CombineResult>
15689canFoldToVW_SU(SDNode *Root,const NodeExtensionHelper &LHS,
15690const NodeExtensionHelper &RHS,SelectionDAG &DAG,
15691constRISCVSubtarget &Subtarget) {
15692
15693if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15694return std::nullopt;
15695return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15696 Root, LHS,/*LHSExt=*/{ExtKind::SExt}, RHS,
15697/*RHSExt=*/{ExtKind::ZExt});
15698}
15699
15700SmallVector<NodeExtensionHelper::CombineToTry>
15701NodeExtensionHelper::getSupportedFoldings(constSDNode *Root) {
15702SmallVector<CombineToTry> Strategies;
15703switch (Root->getOpcode()) {
15704caseISD::ADD:
15705caseISD::SUB:
15706caseISD::OR:
15707caseRISCVISD::ADD_VL:
15708caseRISCVISD::SUB_VL:
15709caseRISCVISD::FADD_VL:
15710caseRISCVISD::FSUB_VL:
15711// add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15712 Strategies.push_back(canFoldToVWWithSameExtension);
15713// add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15714 Strategies.push_back(canFoldToVW_W);
15715break;
15716caseRISCVISD::FMUL_VL:
15717caseRISCVISD::VFMADD_VL:
15718caseRISCVISD::VFMSUB_VL:
15719caseRISCVISD::VFNMADD_VL:
15720caseRISCVISD::VFNMSUB_VL:
15721 Strategies.push_back(canFoldToVWWithSameExtension);
15722break;
15723caseISD::MUL:
15724caseRISCVISD::MUL_VL:
15725// mul -> vwmul(u)
15726 Strategies.push_back(canFoldToVWWithSameExtension);
15727// mul -> vwmulsu
15728 Strategies.push_back(canFoldToVW_SU);
15729break;
15730caseISD::SHL:
15731caseRISCVISD::SHL_VL:
15732// shl -> vwsll
15733 Strategies.push_back(canFoldToVWWithZEXT);
15734break;
15735caseRISCVISD::VWADD_W_VL:
15736caseRISCVISD::VWSUB_W_VL:
15737// vwadd_w|vwsub_w -> vwadd|vwsub
15738 Strategies.push_back(canFoldToVWWithSEXT);
15739break;
15740caseRISCVISD::VWADDU_W_VL:
15741caseRISCVISD::VWSUBU_W_VL:
15742// vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15743 Strategies.push_back(canFoldToVWWithZEXT);
15744break;
15745caseRISCVISD::VFWADD_W_VL:
15746caseRISCVISD::VFWSUB_W_VL:
15747// vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15748 Strategies.push_back(canFoldToVWWithFPEXT);
15749break;
15750default:
15751llvm_unreachable("Unexpected opcode");
15752 }
15753return Strategies;
15754}
15755}// End anonymous namespace.
15756
15757/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15758/// The supported combines are:
15759/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15760/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15761/// mul | mul_vl -> vwmul(u) | vwmul_su
15762/// shl | shl_vl -> vwsll
15763/// fadd_vl -> vfwadd | vfwadd_w
15764/// fsub_vl -> vfwsub | vfwsub_w
15765/// fmul_vl -> vfwmul
15766/// vwadd_w(u) -> vwadd(u)
15767/// vwsub_w(u) -> vwsub(u)
15768/// vfwadd_w -> vfwadd
15769/// vfwsub_w -> vfwsub
15770staticSDValuecombineOp_VLToVWOp_VL(SDNode *N,
15771TargetLowering::DAGCombinerInfo &DCI,
15772constRISCVSubtarget &Subtarget) {
15773SelectionDAG &DAG = DCI.DAG;
15774if (DCI.isBeforeLegalize())
15775returnSDValue();
15776
15777if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15778returnSDValue();
15779
15780SmallVector<SDNode *> Worklist;
15781SmallSet<SDNode *, 8> Inserted;
15782 Worklist.push_back(N);
15783 Inserted.insert(N);
15784SmallVector<CombineResult> CombinesToApply;
15785
15786while (!Worklist.empty()) {
15787SDNode *Root = Worklist.pop_back_val();
15788
15789 NodeExtensionHelperLHS(Root, 0, DAG, Subtarget);
15790 NodeExtensionHelperRHS(Root, 1, DAG, Subtarget);
15791auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15792 &Inserted](const NodeExtensionHelper &Op) {
15793if (Op.needToPromoteOtherUsers()) {
15794for (SDUse &Use :Op.OrigOperand->uses()) {
15795SDNode *TheUser =Use.getUser();
15796if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15797returnfalse;
15798// We only support the first 2 operands of FMA.
15799if (Use.getOperandNo() >= 2)
15800returnfalse;
15801if (Inserted.insert(TheUser).second)
15802 Worklist.push_back(TheUser);
15803 }
15804 }
15805returntrue;
15806 };
15807
15808// Control the compile time by limiting the number of node we look at in
15809// total.
15810if (Inserted.size() >ExtensionMaxWebSize)
15811returnSDValue();
15812
15813SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
15814 NodeExtensionHelper::getSupportedFoldings(Root);
15815
15816assert(!FoldingStrategies.empty() &&"Nothing to be folded");
15817bool Matched =false;
15818for (int Attempt = 0;
15819 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15820 ++Attempt) {
15821
15822for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15823 FoldingStrategies) {
15824 std::optional<CombineResult> Res =
15825 FoldingStrategy(Root,LHS,RHS, DAG, Subtarget);
15826if (Res) {
15827 Matched =true;
15828 CombinesToApply.push_back(*Res);
15829// All the inputs that are extended need to be folded, otherwise
15830// we would be leaving the old input (since it is may still be used),
15831// and the new one.
15832if (Res->LHSExt.has_value())
15833if (!AppendUsersIfNeeded(LHS))
15834returnSDValue();
15835if (Res->RHSExt.has_value())
15836if (!AppendUsersIfNeeded(RHS))
15837returnSDValue();
15838break;
15839 }
15840 }
15841std::swap(LHS,RHS);
15842 }
15843// Right now we do an all or nothing approach.
15844if (!Matched)
15845returnSDValue();
15846 }
15847// Store the value for the replacement of the input node separately.
15848SDValue InputRootReplacement;
15849// We do the RAUW after we materialize all the combines, because some replaced
15850// nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15851// some of these nodes may appear in the NodeExtensionHelpers of some of the
15852// yet-to-be-visited CombinesToApply roots.
15853SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
15854 ValuesToReplace.reserve(CombinesToApply.size());
15855for (CombineResult Res : CombinesToApply) {
15856SDValue NewValue = Res.materialize(DAG, Subtarget);
15857if (!InputRootReplacement) {
15858assert(Res.Root ==N &&
15859"First element is expected to be the current node");
15860 InputRootReplacement = NewValue;
15861 }else {
15862 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15863 }
15864 }
15865for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15866 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15867 DCI.AddToWorklist(OldNewValues.second.getNode());
15868 }
15869return InputRootReplacement;
15870}
15871
15872// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15873// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15874// y will be the Passthru and cond will be the Mask.
15875staticSDValuecombineVWADDSUBWSelect(SDNode *N,SelectionDAG &DAG) {
15876unsigned Opc =N->getOpcode();
15877assert(Opc ==RISCVISD::VWADD_W_VL || Opc ==RISCVISD::VWADDU_W_VL ||
15878 Opc ==RISCVISD::VWSUB_W_VL || Opc ==RISCVISD::VWSUBU_W_VL);
15879
15880SDValueY =N->getOperand(0);
15881SDValue MergeOp =N->getOperand(1);
15882unsigned MergeOpc = MergeOp.getOpcode();
15883
15884if (MergeOpc !=RISCVISD::VMERGE_VL && MergeOpc !=ISD::VSELECT)
15885returnSDValue();
15886
15887SDValueX = MergeOp->getOperand(1);
15888
15889if (!MergeOp.hasOneUse())
15890returnSDValue();
15891
15892// Passthru should be undef
15893SDValue Passthru =N->getOperand(2);
15894if (!Passthru.isUndef())
15895returnSDValue();
15896
15897// Mask should be all ones
15898SDValue Mask =N->getOperand(3);
15899if (Mask.getOpcode() !=RISCVISD::VMSET_VL)
15900returnSDValue();
15901
15902// False value of MergeOp should be all zeros
15903SDValue Z = MergeOp->getOperand(2);
15904
15905if (Z.getOpcode() ==ISD::INSERT_SUBVECTOR &&
15906 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15907 Z = Z.getOperand(1);
15908
15909if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15910returnSDValue();
15911
15912return DAG.getNode(Opc,SDLoc(N),N->getValueType(0),
15913 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15914N->getFlags());
15915}
15916
15917staticSDValueperformVWADDSUBW_VLCombine(SDNode *N,
15918TargetLowering::DAGCombinerInfo &DCI,
15919constRISCVSubtarget &Subtarget) {
15920 [[maybe_unused]]unsigned Opc =N->getOpcode();
15921assert(Opc ==RISCVISD::VWADD_W_VL || Opc ==RISCVISD::VWADDU_W_VL ||
15922 Opc ==RISCVISD::VWSUB_W_VL || Opc ==RISCVISD::VWSUBU_W_VL);
15923
15924if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15925return V;
15926
15927returncombineVWADDSUBWSelect(N, DCI.DAG);
15928}
15929
15930// Helper function for performMemPairCombine.
15931// Try to combine the memory loads/stores LSNode1 and LSNode2
15932// into a single memory pair operation.
15933staticSDValuetryMemPairCombine(SelectionDAG &DAG,LSBaseSDNode *LSNode1,
15934LSBaseSDNode *LSNode2,SDValue BasePtr,
15935uint64_t Imm) {
15936SmallPtrSet<const SDNode *, 32> Visited;
15937SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15938
15939if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15940SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15941returnSDValue();
15942
15943MachineFunction &MF = DAG.getMachineFunction();
15944constRISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15945
15946// The new operation has twice the width.
15947MVT XLenVT = Subtarget.getXLenVT();
15948EVT MemVT = LSNode1->getMemoryVT();
15949EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15950MachineMemOperand *MMO = LSNode1->getMemOperand();
15951MachineMemOperand *NewMMO = MF.getMachineMemOperand(
15952 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15953
15954if (LSNode1->getOpcode() ==ISD::LOAD) {
15955auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15956unsigned Opcode;
15957if (MemVT == MVT::i32)
15958 Opcode = (Ext ==ISD::ZEXTLOAD) ?RISCVISD::TH_LWUD :RISCVISD::TH_LWD;
15959else
15960 Opcode =RISCVISD::TH_LDD;
15961
15962SDValue Res = DAG.getMemIntrinsicNode(
15963 Opcode,SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15964 {LSNode1->getChain(), BasePtr,
15965 DAG.getConstant(Imm,SDLoc(LSNode1), XLenVT)},
15966 NewMemVT, NewMMO);
15967
15968SDValue Node1 =
15969 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)},SDLoc(LSNode1));
15970SDValue Node2 =
15971 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)},SDLoc(LSNode2));
15972
15973 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15974return Node1;
15975 }else {
15976unsigned Opcode = (MemVT == MVT::i32) ?RISCVISD::TH_SWD :RISCVISD::TH_SDD;
15977
15978SDValue Res = DAG.getMemIntrinsicNode(
15979 Opcode,SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15980 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15981 BasePtr, DAG.getConstant(Imm,SDLoc(LSNode1), XLenVT)},
15982 NewMemVT, NewMMO);
15983
15984 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15985return Res;
15986 }
15987}
15988
15989// Try to combine two adjacent loads/stores to a single pair instruction from
15990// the XTHeadMemPair vendor extension.
15991staticSDValueperformMemPairCombine(SDNode *N,
15992TargetLowering::DAGCombinerInfo &DCI) {
15993SelectionDAG &DAG = DCI.DAG;
15994MachineFunction &MF = DAG.getMachineFunction();
15995constRISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15996
15997// Target does not support load/store pair.
15998if (!Subtarget.hasVendorXTHeadMemPair())
15999returnSDValue();
16000
16001LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
16002EVT MemVT = LSNode1->getMemoryVT();
16003unsigned OpNum = LSNode1->getOpcode() ==ISD::LOAD ? 1 : 2;
16004
16005// No volatile, indexed or atomic loads/stores.
16006if (!LSNode1->isSimple() || LSNode1->isIndexed())
16007returnSDValue();
16008
16009// Function to get a base + constant representation from a memory value.
16010auto ExtractBaseAndOffset = [](SDValuePtr) -> std::pair<SDValue, uint64_t> {
16011if (Ptr->getOpcode() ==ISD::ADD)
16012if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
16013return {Ptr->getOperand(0), C1->getZExtValue()};
16014return {Ptr, 0};
16015 };
16016
16017auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
16018
16019SDValue Chain =N->getOperand(0);
16020for (SDUse &Use : Chain->uses()) {
16021if (Use.getUser() !=N &&Use.getResNo() == 0 &&
16022Use.getUser()->getOpcode() ==N->getOpcode()) {
16023LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16024
16025// No volatile, indexed or atomic loads/stores.
16026if (!LSNode2->isSimple() || LSNode2->isIndexed())
16027continue;
16028
16029// Check if LSNode1 and LSNode2 have the same type and extension.
16030if (LSNode1->getOpcode() ==ISD::LOAD)
16031if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16032 cast<LoadSDNode>(LSNode1)->getExtensionType())
16033continue;
16034
16035if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16036continue;
16037
16038auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16039
16040// Check if the base pointer is the same for both instruction.
16041if (Base1 != Base2)
16042continue;
16043
16044// Check if the offsets match the XTHeadMemPair encoding contraints.
16045bool Valid =false;
16046if (MemVT == MVT::i32) {
16047// Check for adjacent i32 values and a 2-bit index.
16048if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16049 Valid =true;
16050 }elseif (MemVT == MVT::i64) {
16051// Check for adjacent i64 values and a 2-bit index.
16052if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16053 Valid =true;
16054 }
16055
16056if (!Valid)
16057continue;
16058
16059// Try to combine.
16060if (SDValue Res =
16061tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16062return Res;
16063 }
16064 }
16065
16066returnSDValue();
16067}
16068
16069// Fold
16070// (fp_to_int (froundeven X)) -> fcvt X, rne
16071// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16072// (fp_to_int (ffloor X)) -> fcvt X, rdn
16073// (fp_to_int (fceil X)) -> fcvt X, rup
16074// (fp_to_int (fround X)) -> fcvt X, rmm
16075// (fp_to_int (frint X)) -> fcvt X
16076staticSDValueperformFP_TO_INTCombine(SDNode *N,
16077TargetLowering::DAGCombinerInfo &DCI,
16078constRISCVSubtarget &Subtarget) {
16079SelectionDAG &DAG = DCI.DAG;
16080constTargetLowering &TLI = DAG.getTargetLoweringInfo();
16081MVT XLenVT = Subtarget.getXLenVT();
16082
16083SDValue Src =N->getOperand(0);
16084
16085// Don't do this for strict-fp Src.
16086if (Src->isStrictFPOpcode())
16087returnSDValue();
16088
16089// Ensure the FP type is legal.
16090if (!TLI.isTypeLegal(Src.getValueType()))
16091returnSDValue();
16092
16093// Don't do this for f16 with Zfhmin and not Zfh.
16094if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16095returnSDValue();
16096
16097RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Src.getOpcode());
16098// If the result is invalid, we didn't find a foldable instruction.
16099if (FRM ==RISCVFPRndMode::Invalid)
16100returnSDValue();
16101
16102SDLocDL(N);
16103bool IsSigned =N->getOpcode() ==ISD::FP_TO_SINT;
16104EVT VT =N->getValueType(0);
16105
16106if (VT.isVector() && TLI.isTypeLegal(VT)) {
16107MVT SrcVT = Src.getSimpleValueType();
16108MVT SrcContainerVT = SrcVT;
16109MVT ContainerVT = VT.getSimpleVT();
16110SDValue XVal = Src.getOperand(0);
16111
16112// For widening and narrowing conversions we just combine it into a
16113// VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16114// end up getting lowered to their appropriate pseudo instructions based on
16115// their operand types
16116if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16117 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16118returnSDValue();
16119
16120// Make fixed-length vectors scalable first
16121if (SrcVT.isFixedLengthVector()) {
16122 SrcContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16123 XVal =convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16124 ContainerVT =
16125getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16126 }
16127
16128auto [Mask, VL] =
16129getDefaultVLOps(SrcVT, SrcContainerVT,DL, DAG, Subtarget);
16130
16131SDValue FpToInt;
16132if (FRM ==RISCVFPRndMode::RTZ) {
16133// Use the dedicated trunc static rounding mode if we're truncating so we
16134// don't need to generate calls to fsrmi/fsrm
16135unsigned Opc =
16136 IsSigned ?RISCVISD::VFCVT_RTZ_X_F_VL :RISCVISD::VFCVT_RTZ_XU_F_VL;
16137 FpToInt = DAG.getNode(Opc,DL, ContainerVT, XVal, Mask, VL);
16138 }else {
16139unsigned Opc =
16140 IsSigned ?RISCVISD::VFCVT_RM_X_F_VL :RISCVISD::VFCVT_RM_XU_F_VL;
16141 FpToInt = DAG.getNode(Opc,DL, ContainerVT, XVal, Mask,
16142 DAG.getTargetConstant(FRM,DL, XLenVT), VL);
16143 }
16144
16145// If converted from fixed-length to scalable, convert back
16146if (VT.isFixedLengthVector())
16147 FpToInt =convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16148
16149return FpToInt;
16150 }
16151
16152// Only handle XLen or i32 types. Other types narrower than XLen will
16153// eventually be legalized to XLenVT.
16154if (VT != MVT::i32 && VT != XLenVT)
16155returnSDValue();
16156
16157unsigned Opc;
16158if (VT == XLenVT)
16159 Opc = IsSigned ?RISCVISD::FCVT_X :RISCVISD::FCVT_XU;
16160else
16161 Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;
16162
16163SDValue FpToInt = DAG.getNode(Opc,DL, XLenVT, Src.getOperand(0),
16164 DAG.getTargetConstant(FRM,DL, XLenVT));
16165return DAG.getNode(ISD::TRUNCATE,DL, VT, FpToInt);
16166}
16167
16168// Fold
16169// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16170// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16171// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16172// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16173// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16174// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16175staticSDValueperformFP_TO_INT_SATCombine(SDNode *N,
16176TargetLowering::DAGCombinerInfo &DCI,
16177constRISCVSubtarget &Subtarget) {
16178SelectionDAG &DAG = DCI.DAG;
16179constTargetLowering &TLI = DAG.getTargetLoweringInfo();
16180MVT XLenVT = Subtarget.getXLenVT();
16181
16182// Only handle XLen types. Other types narrower than XLen will eventually be
16183// legalized to XLenVT.
16184EVT DstVT =N->getValueType(0);
16185if (DstVT != XLenVT)
16186returnSDValue();
16187
16188SDValue Src =N->getOperand(0);
16189
16190// Don't do this for strict-fp Src.
16191if (Src->isStrictFPOpcode())
16192returnSDValue();
16193
16194// Ensure the FP type is also legal.
16195if (!TLI.isTypeLegal(Src.getValueType()))
16196returnSDValue();
16197
16198// Don't do this for f16 with Zfhmin and not Zfh.
16199if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16200returnSDValue();
16201
16202EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16203
16204RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Src.getOpcode());
16205if (FRM ==RISCVFPRndMode::Invalid)
16206returnSDValue();
16207
16208bool IsSigned =N->getOpcode() ==ISD::FP_TO_SINT_SAT;
16209
16210unsigned Opc;
16211if (SatVT == DstVT)
16212 Opc = IsSigned ?RISCVISD::FCVT_X :RISCVISD::FCVT_XU;
16213elseif (DstVT == MVT::i64 && SatVT == MVT::i32)
16214 Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;
16215else
16216returnSDValue();
16217// FIXME: Support other SatVTs by clamping before or after the conversion.
16218
16219 Src = Src.getOperand(0);
16220
16221SDLocDL(N);
16222SDValue FpToInt = DAG.getNode(Opc,DL, XLenVT, Src,
16223 DAG.getTargetConstant(FRM,DL, XLenVT));
16224
16225// fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16226// extend.
16227if (Opc ==RISCVISD::FCVT_WU_RV64)
16228 FpToInt = DAG.getZeroExtendInReg(FpToInt,DL, MVT::i32);
16229
16230// RISC-V FP-to-int conversions saturate to the destination register size, but
16231// don't produce 0 for nan.
16232SDValue ZeroInt = DAG.getConstant(0,DL, DstVT);
16233return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,ISD::CondCode::SETUO);
16234}
16235
16236// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16237// smaller than XLenVT.
16238staticSDValueperformBITREVERSECombine(SDNode *N,SelectionDAG &DAG,
16239constRISCVSubtarget &Subtarget) {
16240assert(Subtarget.hasStdExtZbkb() &&"Unexpected extension");
16241
16242SDValue Src =N->getOperand(0);
16243if (Src.getOpcode() !=ISD::BSWAP)
16244returnSDValue();
16245
16246EVT VT =N->getValueType(0);
16247if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16248 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16249returnSDValue();
16250
16251SDLocDL(N);
16252return DAG.getNode(RISCVISD::BREV8,DL, VT, Src.getOperand(0));
16253}
16254
16255staticSDValueperformVP_REVERSECombine(SDNode *N,SelectionDAG &DAG,
16256constRISCVSubtarget &Subtarget) {
16257// Fold:
16258// vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16259
16260// Check if its first operand is a vp.load.
16261auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16262if (!VPLoad)
16263returnSDValue();
16264
16265EVT LoadVT = VPLoad->getValueType(0);
16266// We do not have a strided_load version for masks, and the evl of vp.reverse
16267// and vp.load should always be the same.
16268if (!LoadVT.getVectorElementType().isByteSized() ||
16269N->getOperand(2) != VPLoad->getVectorLength() ||
16270 !N->getOperand(0).hasOneUse())
16271returnSDValue();
16272
16273// Check if the mask of outer vp.reverse are all 1's.
16274if (!isOneOrOneSplat(N->getOperand(1)))
16275returnSDValue();
16276
16277SDValue LoadMask = VPLoad->getMask();
16278// If Mask is all ones, then load is unmasked and can be reversed.
16279if (!isOneOrOneSplat(LoadMask)) {
16280// If the mask is not all ones, we can reverse the load if the mask was also
16281// reversed by an unmasked vp.reverse with the same EVL.
16282if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16283 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16284 LoadMask.getOperand(2) != VPLoad->getVectorLength())
16285returnSDValue();
16286 LoadMask = LoadMask.getOperand(0);
16287 }
16288
16289// Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16290SDLocDL(N);
16291MVT XLenVT = Subtarget.getXLenVT();
16292SDValue NumElem = VPLoad->getVectorLength();
16293uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16294
16295SDValue Temp1 = DAG.getNode(ISD::SUB,DL, XLenVT, NumElem,
16296 DAG.getConstant(1,DL, XLenVT));
16297SDValue Temp2 = DAG.getNode(ISD::MUL,DL, XLenVT, Temp1,
16298 DAG.getConstant(ElemWidthByte,DL, XLenVT));
16299SDValueBase = DAG.getNode(ISD::ADD,DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16300SDValue Stride = DAG.getConstant(-ElemWidthByte,DL, XLenVT);
16301
16302MachineFunction &MF = DAG.getMachineFunction();
16303MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16304MachineMemOperand *MMO = MF.getMachineMemOperand(
16305 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16306LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16307
16308SDValue Ret = DAG.getStridedLoadVP(
16309 LoadVT,DL, VPLoad->getChain(),Base, Stride, LoadMask,
16310 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16311
16312 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16313
16314return Ret;
16315}
16316
16317staticSDValueperformVP_STORECombine(SDNode *N,SelectionDAG &DAG,
16318constRISCVSubtarget &Subtarget) {
16319// Fold:
16320// vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
16321// -1, MASK)
16322auto *VPStore = cast<VPStoreSDNode>(N);
16323
16324if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16325returnSDValue();
16326
16327SDValue VPReverse = VPStore->getValue();
16328EVT ReverseVT = VPReverse->getValueType(0);
16329
16330// We do not have a strided_store version for masks, and the evl of vp.reverse
16331// and vp.store should always be the same.
16332if (!ReverseVT.getVectorElementType().isByteSized() ||
16333 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
16334 !VPReverse.hasOneUse())
16335returnSDValue();
16336
16337SDValue StoreMask = VPStore->getMask();
16338// If Mask is all ones, then load is unmasked and can be reversed.
16339if (!isOneOrOneSplat(StoreMask)) {
16340// If the mask is not all ones, we can reverse the store if the mask was
16341// also reversed by an unmasked vp.reverse with the same EVL.
16342if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16343 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
16344 StoreMask.getOperand(2) != VPStore->getVectorLength())
16345returnSDValue();
16346 StoreMask = StoreMask.getOperand(0);
16347 }
16348
16349// Base = StoreAddr + (NumElem - 1) * ElemWidthByte
16350SDLocDL(N);
16351MVT XLenVT = Subtarget.getXLenVT();
16352SDValue NumElem = VPStore->getVectorLength();
16353uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
16354
16355SDValue Temp1 = DAG.getNode(ISD::SUB,DL, XLenVT, NumElem,
16356 DAG.getConstant(1,DL, XLenVT));
16357SDValue Temp2 = DAG.getNode(ISD::MUL,DL, XLenVT, Temp1,
16358 DAG.getConstant(ElemWidthByte,DL, XLenVT));
16359SDValueBase =
16360 DAG.getNode(ISD::ADD,DL, XLenVT, VPStore->getBasePtr(), Temp2);
16361SDValue Stride = DAG.getConstant(-ElemWidthByte,DL, XLenVT);
16362
16363MachineFunction &MF = DAG.getMachineFunction();
16364MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
16365MachineMemOperand *MMO = MF.getMachineMemOperand(
16366 PtrInfo, VPStore->getMemOperand()->getFlags(),
16367LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
16368
16369return DAG.getStridedStoreVP(
16370 VPStore->getChain(),DL, VPReverse.getOperand(0),Base,
16371 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16372 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16373 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16374}
16375
16376// Convert from one FMA opcode to another based on whether we are negating the
16377// multiply result and/or the accumulator.
16378// NOTE: Only supports RVV operations with VL.
16379staticunsignednegateFMAOpcode(unsigned Opcode,bool NegMul,bool NegAcc) {
16380// Negating the multiply result changes ADD<->SUB and toggles 'N'.
16381if (NegMul) {
16382// clang-format off
16383switch (Opcode) {
16384default:llvm_unreachable("Unexpected opcode");
16385caseRISCVISD::VFMADD_VL: Opcode =RISCVISD::VFNMSUB_VL;break;
16386caseRISCVISD::VFNMSUB_VL: Opcode =RISCVISD::VFMADD_VL;break;
16387caseRISCVISD::VFNMADD_VL: Opcode =RISCVISD::VFMSUB_VL;break;
16388caseRISCVISD::VFMSUB_VL: Opcode =RISCVISD::VFNMADD_VL;break;
16389caseRISCVISD::STRICT_VFMADD_VL: Opcode =RISCVISD::STRICT_VFNMSUB_VL;break;
16390caseRISCVISD::STRICT_VFNMSUB_VL: Opcode =RISCVISD::STRICT_VFMADD_VL;break;
16391caseRISCVISD::STRICT_VFNMADD_VL: Opcode =RISCVISD::STRICT_VFMSUB_VL;break;
16392caseRISCVISD::STRICT_VFMSUB_VL: Opcode =RISCVISD::STRICT_VFNMADD_VL;break;
16393 }
16394// clang-format on
16395 }
16396
16397// Negating the accumulator changes ADD<->SUB.
16398if (NegAcc) {
16399// clang-format off
16400switch (Opcode) {
16401default:llvm_unreachable("Unexpected opcode");
16402caseRISCVISD::VFMADD_VL: Opcode =RISCVISD::VFMSUB_VL;break;
16403caseRISCVISD::VFMSUB_VL: Opcode =RISCVISD::VFMADD_VL;break;
16404caseRISCVISD::VFNMADD_VL: Opcode =RISCVISD::VFNMSUB_VL;break;
16405caseRISCVISD::VFNMSUB_VL: Opcode =RISCVISD::VFNMADD_VL;break;
16406caseRISCVISD::STRICT_VFMADD_VL: Opcode =RISCVISD::STRICT_VFMSUB_VL;break;
16407caseRISCVISD::STRICT_VFMSUB_VL: Opcode =RISCVISD::STRICT_VFMADD_VL;break;
16408caseRISCVISD::STRICT_VFNMADD_VL: Opcode =RISCVISD::STRICT_VFNMSUB_VL;break;
16409caseRISCVISD::STRICT_VFNMSUB_VL: Opcode =RISCVISD::STRICT_VFNMADD_VL;break;
16410 }
16411// clang-format on
16412 }
16413
16414return Opcode;
16415}
16416
16417staticSDValuecombineVFMADD_VLWithVFNEG_VL(SDNode *N,SelectionDAG &DAG) {
16418// Fold FNEG_VL into FMA opcodes.
16419// The first operand of strict-fp is chain.
16420bool IsStrict =
16421 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16422unsignedOffset = IsStrict ? 1 : 0;
16423SDValueA =N->getOperand(0 +Offset);
16424SDValueB =N->getOperand(1 +Offset);
16425SDValueC =N->getOperand(2 +Offset);
16426SDValue Mask =N->getOperand(3 +Offset);
16427SDValue VL =N->getOperand(4 +Offset);
16428
16429auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16430if (V.getOpcode() ==RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16431 V.getOperand(2) == VL) {
16432// Return the negated input.
16433 V = V.getOperand(0);
16434returntrue;
16435 }
16436
16437returnfalse;
16438 };
16439
16440bool NegA = invertIfNegative(A);
16441bool NegB = invertIfNegative(B);
16442bool NegC = invertIfNegative(C);
16443
16444// If no operands are negated, we're done.
16445if (!NegA && !NegB && !NegC)
16446returnSDValue();
16447
16448unsigned NewOpcode =negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16449if (IsStrict)
16450return DAG.getNode(NewOpcode,SDLoc(N),N->getVTList(),
16451 {N->getOperand(0), A, B, C, Mask, VL});
16452return DAG.getNode(NewOpcode,SDLoc(N),N->getValueType(0),A,B,C, Mask,
16453 VL);
16454}
16455
16456staticSDValueperformVFMADD_VLCombine(SDNode *N,
16457TargetLowering::DAGCombinerInfo &DCI,
16458constRISCVSubtarget &Subtarget) {
16459SelectionDAG &DAG = DCI.DAG;
16460
16461if (SDValue V =combineVFMADD_VLWithVFNEG_VL(N, DAG))
16462return V;
16463
16464// FIXME: Ignore strict opcodes for now.
16465if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16466returnSDValue();
16467
16468returncombineOp_VLToVWOp_VL(N, DCI, Subtarget);
16469}
16470
16471staticSDValueperformSRACombine(SDNode *N,SelectionDAG &DAG,
16472constRISCVSubtarget &Subtarget) {
16473assert(N->getOpcode() ==ISD::SRA &&"Unexpected opcode");
16474
16475EVT VT =N->getValueType(0);
16476
16477if (VT != Subtarget.getXLenVT())
16478returnSDValue();
16479
16480if (!isa<ConstantSDNode>(N->getOperand(1)))
16481returnSDValue();
16482uint64_t ShAmt =N->getConstantOperandVal(1);
16483
16484SDValue N0 =N->getOperand(0);
16485
16486// Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16487// (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16488if (N0.getOpcode() ==ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16489unsigned ExtSize =
16490 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16491if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() ==ISD::SHL &&
16492 N0.getOperand(0).hasOneUse() &&
16493 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16494uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16495if (LShAmt < ExtSize) {
16496unsignedSize = VT.getSizeInBits();
16497SDLoc ShlDL(N0.getOperand(0));
16498SDValue Shl =
16499 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16500 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16501SDLocDL(N);
16502return DAG.getNode(ISD::SRA,DL, VT, Shl,
16503 DAG.getConstant(ShAmt + (Size - ExtSize),DL, VT));
16504 }
16505 }
16506 }
16507
16508if (ShAmt > 32 || VT != MVT::i64)
16509returnSDValue();
16510
16511// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16512// FIXME: Should this be a generic combine? There's a similar combine on X86.
16513//
16514// Also try these folds where an add or sub is in the middle.
16515// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16516// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16517SDValue Shl;
16518ConstantSDNode *AddC =nullptr;
16519
16520// We might have an ADD or SUB between the SRA and SHL.
16521bool IsAdd = N0.getOpcode() ==ISD::ADD;
16522if ((IsAdd || N0.getOpcode() ==ISD::SUB)) {
16523// Other operand needs to be a constant we can modify.
16524 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16525if (!AddC)
16526returnSDValue();
16527
16528// AddC needs to have at least 32 trailing zeros.
16529if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16530returnSDValue();
16531
16532// All users should be a shift by constant less than or equal to 32. This
16533// ensures we'll do this optimization for each of them to produce an
16534// add/sub+sext_inreg they can all share.
16535for (SDNode *U : N0->users()) {
16536if (U->getOpcode() !=ISD::SRA ||
16537 !isa<ConstantSDNode>(U->getOperand(1)) ||
16538 U->getConstantOperandVal(1) > 32)
16539returnSDValue();
16540 }
16541
16542 Shl = N0.getOperand(IsAdd ? 0 : 1);
16543 }else {
16544// Not an ADD or SUB.
16545 Shl = N0;
16546 }
16547
16548// Look for a shift left by 32.
16549if (Shl.getOpcode() !=ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16550 Shl.getConstantOperandVal(1) != 32)
16551returnSDValue();
16552
16553// We if we didn't look through an add/sub, then the shl should have one use.
16554// If we did look through an add/sub, the sext_inreg we create is free so
16555// we're only creating 2 new instructions. It's enough to only remove the
16556// original sra+add/sub.
16557if (!AddC && !Shl.hasOneUse())
16558returnSDValue();
16559
16560SDLocDL(N);
16561SDValue In = Shl.getOperand(0);
16562
16563// If we looked through an ADD or SUB, we need to rebuild it with the shifted
16564// constant.
16565if (AddC) {
16566SDValue ShiftedAddC =
16567 DAG.getConstant(AddC->getZExtValue() >> 32,DL, MVT::i64);
16568if (IsAdd)
16569 In = DAG.getNode(ISD::ADD,DL, MVT::i64, In, ShiftedAddC);
16570else
16571 In = DAG.getNode(ISD::SUB,DL, MVT::i64, ShiftedAddC, In);
16572 }
16573
16574SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, In,
16575 DAG.getValueType(MVT::i32));
16576if (ShAmt == 32)
16577return SExt;
16578
16579return DAG.getNode(
16580ISD::SHL,DL, MVT::i64, SExt,
16581 DAG.getConstant(32 - ShAmt,DL, MVT::i64));
16582}
16583
16584// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16585// the result is used as the conditon of a br_cc or select_cc we can invert,
16586// inverting the setcc is free, and Z is 0/1. Caller will invert the
16587// br_cc/select_cc.
16588staticSDValuetryDemorganOfBooleanCondition(SDValueCond,SelectionDAG &DAG) {
16589bool IsAnd =Cond.getOpcode() ==ISD::AND;
16590if (!IsAnd &&Cond.getOpcode() !=ISD::OR)
16591returnSDValue();
16592
16593if (!Cond.hasOneUse())
16594returnSDValue();
16595
16596SDValue Setcc =Cond.getOperand(0);
16597SDValueXor =Cond.getOperand(1);
16598// Canonicalize setcc to LHS.
16599if (Setcc.getOpcode() !=ISD::SETCC)
16600std::swap(Setcc,Xor);
16601// LHS should be a setcc and RHS should be an xor.
16602if (Setcc.getOpcode() !=ISD::SETCC || !Setcc.hasOneUse() ||
16603Xor.getOpcode() !=ISD::XOR || !Xor.hasOneUse())
16604returnSDValue();
16605
16606// If the condition is an And, SimplifyDemandedBits may have changed
16607// (xor Z, 1) to (not Z).
16608SDValue Xor1 =Xor.getOperand(1);
16609if (!isOneConstant(Xor1) && !(IsAnd &&isAllOnesConstant(Xor1)))
16610returnSDValue();
16611
16612EVT VT =Cond.getValueType();
16613SDValue Xor0 =Xor.getOperand(0);
16614
16615// The LHS of the xor needs to be 0/1.
16616APInt Mask =APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
16617if (!DAG.MaskedValueIsZero(Xor0, Mask))
16618returnSDValue();
16619
16620// We can only invert integer setccs.
16621EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16622if (!SetCCOpVT.isScalarInteger())
16623returnSDValue();
16624
16625ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16626if (ISD::isIntEqualitySetCC(CCVal)) {
16627 CCVal =ISD::getSetCCInverse(CCVal, SetCCOpVT);
16628 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16629 Setcc.getOperand(1), CCVal);
16630 }elseif (CCVal ==ISD::SETLT &&isNullConstant(Setcc.getOperand(0))) {
16631// Invert (setlt 0, X) by converting to (setlt X, 1).
16632 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16633 DAG.getConstant(1,SDLoc(Setcc), VT), CCVal);
16634 }elseif (CCVal ==ISD::SETLT &&isOneConstant(Setcc.getOperand(1))) {
16635// (setlt X, 1) by converting to (setlt 0, X).
16636 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16637 DAG.getConstant(0,SDLoc(Setcc), VT),
16638 Setcc.getOperand(0), CCVal);
16639 }else
16640returnSDValue();
16641
16642unsigned Opc = IsAnd ?ISD::OR :ISD::AND;
16643return DAG.getNode(Opc,SDLoc(Cond), VT, Setcc,Xor.getOperand(0));
16644}
16645
16646// Perform common combines for BR_CC and SELECT_CC condtions.
16647staticboolcombine_CC(SDValue &LHS,SDValue &RHS,SDValue &CC,constSDLoc &DL,
16648SelectionDAG &DAG,constRISCVSubtarget &Subtarget) {
16649ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16650
16651// As far as arithmetic right shift always saves the sign,
16652// shift can be omitted.
16653// Fold setlt (sra X, N), 0 -> setlt X, 0 and
16654// setge (sra X, N), 0 -> setge X, 0
16655if (isNullConstant(RHS) && (CCVal ==ISD::SETGE || CCVal ==ISD::SETLT) &&
16656LHS.getOpcode() ==ISD::SRA) {
16657LHS =LHS.getOperand(0);
16658returntrue;
16659 }
16660
16661if (!ISD::isIntEqualitySetCC(CCVal))
16662returnfalse;
16663
16664// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16665// Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16666if (LHS.getOpcode() ==ISD::SETCC &&isNullConstant(RHS) &&
16667LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16668// If we're looking for eq 0 instead of ne 0, we need to invert the
16669// condition.
16670bool Invert = CCVal ==ISD::SETEQ;
16671 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16672if (Invert)
16673 CCVal =ISD::getSetCCInverse(CCVal,LHS.getValueType());
16674
16675RHS =LHS.getOperand(1);
16676LHS =LHS.getOperand(0);
16677translateSetCCForBranch(DL,LHS,RHS, CCVal, DAG);
16678
16679CC = DAG.getCondCode(CCVal);
16680returntrue;
16681 }
16682
16683// Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16684if (LHS.getOpcode() ==ISD::XOR &&isNullConstant(RHS)) {
16685RHS =LHS.getOperand(1);
16686LHS =LHS.getOperand(0);
16687returntrue;
16688 }
16689
16690// Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16691if (isNullConstant(RHS) &&LHS.getOpcode() ==ISD::SRL &&LHS.hasOneUse() &&
16692LHS.getOperand(1).getOpcode() ==ISD::Constant) {
16693SDValue LHS0 =LHS.getOperand(0);
16694if (LHS0.getOpcode() ==ISD::AND &&
16695 LHS0.getOperand(1).getOpcode() ==ISD::Constant) {
16696uint64_t Mask = LHS0.getConstantOperandVal(1);
16697uint64_t ShAmt =LHS.getConstantOperandVal(1);
16698if (isPowerOf2_64(Mask) &&Log2_64(Mask) == ShAmt) {
16699 CCVal = CCVal ==ISD::SETEQ ?ISD::SETGE :ISD::SETLT;
16700CC = DAG.getCondCode(CCVal);
16701
16702 ShAmt =LHS.getValueSizeInBits() - 1 - ShAmt;
16703LHS = LHS0.getOperand(0);
16704if (ShAmt != 0)
16705LHS =
16706 DAG.getNode(ISD::SHL,DL,LHS.getValueType(), LHS0.getOperand(0),
16707 DAG.getConstant(ShAmt,DL,LHS.getValueType()));
16708returntrue;
16709 }
16710 }
16711 }
16712
16713// (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16714// This can occur when legalizing some floating point comparisons.
16715APInt Mask =APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16716if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16717 CCVal =ISD::getSetCCInverse(CCVal,LHS.getValueType());
16718CC = DAG.getCondCode(CCVal);
16719RHS = DAG.getConstant(0,DL,LHS.getValueType());
16720returntrue;
16721 }
16722
16723if (isNullConstant(RHS)) {
16724if (SDValue NewCond =tryDemorganOfBooleanCondition(LHS, DAG)) {
16725 CCVal =ISD::getSetCCInverse(CCVal,LHS.getValueType());
16726CC = DAG.getCondCode(CCVal);
16727LHS = NewCond;
16728returntrue;
16729 }
16730 }
16731
16732returnfalse;
16733}
16734
16735// Fold
16736// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16737// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16738// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16739// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16740staticSDValuetryFoldSelectIntoOp(SDNode *N,SelectionDAG &DAG,
16741SDValue TrueVal,SDValue FalseVal,
16742bool Swapped) {
16743bool Commutative =true;
16744unsigned Opc = TrueVal.getOpcode();
16745switch (Opc) {
16746default:
16747returnSDValue();
16748caseISD::SHL:
16749caseISD::SRA:
16750caseISD::SRL:
16751caseISD::SUB:
16752 Commutative =false;
16753break;
16754caseISD::ADD:
16755caseISD::OR:
16756caseISD::XOR:
16757break;
16758 }
16759
16760if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16761returnSDValue();
16762
16763unsigned OpToFold;
16764if (FalseVal == TrueVal.getOperand(0))
16765 OpToFold = 0;
16766elseif (Commutative && FalseVal == TrueVal.getOperand(1))
16767 OpToFold = 1;
16768else
16769returnSDValue();
16770
16771EVT VT =N->getValueType(0);
16772SDLocDL(N);
16773SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16774EVT OtherOpVT = OtherOp.getValueType();
16775SDValue IdentityOperand =
16776 DAG.getNeutralElement(Opc,DL, OtherOpVT,N->getFlags());
16777if (!Commutative)
16778 IdentityOperand = DAG.getConstant(0,DL, OtherOpVT);
16779assert(IdentityOperand &&"No identity operand!");
16780
16781if (Swapped)
16782std::swap(OtherOp, IdentityOperand);
16783SDValue NewSel =
16784 DAG.getSelect(DL, OtherOpVT,N->getOperand(0), OtherOp, IdentityOperand);
16785return DAG.getNode(TrueVal.getOpcode(),DL, VT, FalseVal, NewSel);
16786}
16787
16788// This tries to get rid of `select` and `icmp` that are being used to handle
16789// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16790staticSDValuefoldSelectOfCTTZOrCTLZ(SDNode *N,SelectionDAG &DAG) {
16791SDValueCond =N->getOperand(0);
16792
16793// This represents either CTTZ or CTLZ instruction.
16794SDValue CountZeroes;
16795
16796SDValue ValOnZero;
16797
16798if (Cond.getOpcode() !=ISD::SETCC)
16799returnSDValue();
16800
16801if (!isNullConstant(Cond->getOperand(1)))
16802returnSDValue();
16803
16804ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16805if (CCVal ==ISD::CondCode::SETEQ) {
16806 CountZeroes =N->getOperand(2);
16807 ValOnZero =N->getOperand(1);
16808 }elseif (CCVal ==ISD::CondCode::SETNE) {
16809 CountZeroes =N->getOperand(1);
16810 ValOnZero =N->getOperand(2);
16811 }else {
16812returnSDValue();
16813 }
16814
16815if (CountZeroes.getOpcode() ==ISD::TRUNCATE ||
16816 CountZeroes.getOpcode() ==ISD::ZERO_EXTEND)
16817 CountZeroes = CountZeroes.getOperand(0);
16818
16819if (CountZeroes.getOpcode() !=ISD::CTTZ &&
16820 CountZeroes.getOpcode() !=ISD::CTTZ_ZERO_UNDEF &&
16821 CountZeroes.getOpcode() !=ISD::CTLZ &&
16822 CountZeroes.getOpcode() !=ISD::CTLZ_ZERO_UNDEF)
16823returnSDValue();
16824
16825if (!isNullConstant(ValOnZero))
16826returnSDValue();
16827
16828SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16829if (Cond->getOperand(0) != CountZeroesArgument)
16830returnSDValue();
16831
16832if (CountZeroes.getOpcode() ==ISD::CTTZ_ZERO_UNDEF) {
16833 CountZeroes = DAG.getNode(ISD::CTTZ,SDLoc(CountZeroes),
16834 CountZeroes.getValueType(), CountZeroesArgument);
16835 }elseif (CountZeroes.getOpcode() ==ISD::CTLZ_ZERO_UNDEF) {
16836 CountZeroes = DAG.getNode(ISD::CTLZ,SDLoc(CountZeroes),
16837 CountZeroes.getValueType(), CountZeroesArgument);
16838 }
16839
16840unsignedBitWidth = CountZeroes.getValueSizeInBits();
16841SDValue BitWidthMinusOne =
16842 DAG.getConstant(BitWidth - 1,SDLoc(N), CountZeroes.getValueType());
16843
16844auto AndNode = DAG.getNode(ISD::AND,SDLoc(N), CountZeroes.getValueType(),
16845 CountZeroes, BitWidthMinusOne);
16846return DAG.getZExtOrTrunc(AndNode,SDLoc(N),N->getValueType(0));
16847}
16848
16849staticSDValueuseInversedSetcc(SDNode *N,SelectionDAG &DAG,
16850constRISCVSubtarget &Subtarget) {
16851SDValueCond =N->getOperand(0);
16852SDValue True =N->getOperand(1);
16853SDValue False =N->getOperand(2);
16854SDLocDL(N);
16855EVT VT =N->getValueType(0);
16856EVT CondVT =Cond.getValueType();
16857
16858if (Cond.getOpcode() !=ISD::SETCC || !Cond.hasOneUse())
16859returnSDValue();
16860
16861// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16862// BEXTI, where C is power of 2.
16863if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16864 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16865SDValueLHS =Cond.getOperand(0);
16866SDValueRHS =Cond.getOperand(1);
16867ISD::CondCodeCC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16868if (CC ==ISD::SETEQ &&LHS.getOpcode() ==ISD::AND &&
16869 isa<ConstantSDNode>(LHS.getOperand(1)) &&isNullConstant(RHS)) {
16870constAPInt &MaskVal =LHS.getConstantOperandAPInt(1);
16871if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16872return DAG.getSelect(DL, VT,
16873 DAG.getSetCC(DL, CondVT,LHS,RHS,ISD::SETNE),
16874 False, True);
16875 }
16876 }
16877returnSDValue();
16878}
16879
16880staticboolmatchSelectAddSub(SDValue TrueVal,SDValue FalseVal,bool &SwapCC) {
16881if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
16882returnfalse;
16883
16884 SwapCC =false;
16885if (TrueVal.getOpcode() ==ISD::SUB && FalseVal.getOpcode() ==ISD::ADD) {
16886std::swap(TrueVal, FalseVal);
16887 SwapCC =true;
16888 }
16889
16890if (TrueVal.getOpcode() !=ISD::ADD || FalseVal.getOpcode() !=ISD::SUB)
16891returnfalse;
16892
16893SDValueA = FalseVal.getOperand(0);
16894SDValueB = FalseVal.getOperand(1);
16895// Add is commutative, so check both orders
16896return ((TrueVal.getOperand(0) ==A && TrueVal.getOperand(1) ==B) ||
16897 (TrueVal.getOperand(1) ==A && TrueVal.getOperand(0) ==B));
16898}
16899
16900/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
16901/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
16902/// register pressure over the add followed by masked vsub sequence.
16903staticSDValueperformVSELECTCombine(SDNode *N,SelectionDAG &DAG) {
16904SDLocDL(N);
16905EVT VT =N->getValueType(0);
16906SDValueCC =N->getOperand(0);
16907SDValue TrueVal =N->getOperand(1);
16908SDValue FalseVal =N->getOperand(2);
16909
16910bool SwapCC;
16911if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
16912returnSDValue();
16913
16914SDValue Sub = SwapCC ? TrueVal : FalseVal;
16915SDValueA = Sub.getOperand(0);
16916SDValueB = Sub.getOperand(1);
16917
16918// Arrange the select such that we can match a masked
16919// vrsub.vi to perform the conditional negate
16920SDValue NegB = DAG.getNegative(B,DL, VT);
16921if (!SwapCC)
16922CC = DAG.getLogicalNOT(DL,CC,CC->getValueType(0));
16923SDValue NewB = DAG.getNode(ISD::VSELECT,DL, VT,CC, NegB,B);
16924return DAG.getNode(ISD::ADD,DL, VT,A, NewB);
16925}
16926
16927staticSDValueperformSELECTCombine(SDNode *N,SelectionDAG &DAG,
16928constRISCVSubtarget &Subtarget) {
16929if (SDValue Folded =foldSelectOfCTTZOrCTLZ(N, DAG))
16930return Folded;
16931
16932if (SDValue V =useInversedSetcc(N, DAG, Subtarget))
16933return V;
16934
16935if (Subtarget.hasConditionalMoveFusion())
16936returnSDValue();
16937
16938SDValue TrueVal =N->getOperand(1);
16939SDValue FalseVal =N->getOperand(2);
16940if (SDValue V =tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal,/*Swapped*/false))
16941return V;
16942returntryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal,/*Swapped*/true);
16943}
16944
16945/// If we have a build_vector where each lane is binop X, C, where C
16946/// is a constant (but not necessarily the same constant on all lanes),
16947/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16948/// We assume that materializing a constant build vector will be no more
16949/// expensive that performing O(n) binops.
16950staticSDValueperformBUILD_VECTORCombine(SDNode *N,SelectionDAG &DAG,
16951constRISCVSubtarget &Subtarget,
16952constRISCVTargetLowering &TLI) {
16953SDLocDL(N);
16954EVT VT =N->getValueType(0);
16955
16956assert(!VT.isScalableVector() &&"unexpected build vector");
16957
16958if (VT.getVectorNumElements() == 1)
16959returnSDValue();
16960
16961constunsigned Opcode =N->op_begin()->getNode()->getOpcode();
16962if (!TLI.isBinOp(Opcode))
16963returnSDValue();
16964
16965if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16966returnSDValue();
16967
16968// This BUILD_VECTOR involves an implicit truncation, and sinking
16969// truncates through binops is non-trivial.
16970if (N->op_begin()->getValueType() != VT.getVectorElementType())
16971returnSDValue();
16972
16973SmallVector<SDValue> LHSOps;
16974SmallVector<SDValue> RHSOps;
16975for (SDValueOp :N->ops()) {
16976if (Op.isUndef()) {
16977// We can't form a divide or remainder from undef.
16978if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16979returnSDValue();
16980
16981 LHSOps.push_back(Op);
16982 RHSOps.push_back(Op);
16983continue;
16984 }
16985
16986// TODO: We can handle operations which have an neutral rhs value
16987// (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16988// of profit in a more explicit manner.
16989if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16990returnSDValue();
16991
16992 LHSOps.push_back(Op.getOperand(0));
16993if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16994 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16995returnSDValue();
16996// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16997// have different LHS and RHS types.
16998if (Op.getOperand(0).getValueType() !=Op.getOperand(1).getValueType())
16999returnSDValue();
17000
17001 RHSOps.push_back(Op.getOperand(1));
17002 }
17003
17004return DAG.getNode(Opcode,DL, VT, DAG.getBuildVector(VT,DL, LHSOps),
17005 DAG.getBuildVector(VT,DL, RHSOps));
17006}
17007
17008staticSDValueperformINSERT_VECTOR_ELTCombine(SDNode *N,SelectionDAG &DAG,
17009constRISCVSubtarget &Subtarget,
17010constRISCVTargetLowering &TLI) {
17011SDValue InVec =N->getOperand(0);
17012SDValue InVal =N->getOperand(1);
17013SDValue EltNo =N->getOperand(2);
17014SDLocDL(N);
17015
17016EVT VT = InVec.getValueType();
17017if (VT.isScalableVector())
17018returnSDValue();
17019
17020if (!InVec.hasOneUse())
17021returnSDValue();
17022
17023// Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
17024// move the insert_vector_elts into the arms of the binop. Note that
17025// the new RHS must be a constant.
17026constunsigned InVecOpcode = InVec->getOpcode();
17027if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
17028 InVal.hasOneUse()) {
17029SDValue InVecLHS = InVec->getOperand(0);
17030SDValue InVecRHS = InVec->getOperand(1);
17031SDValue InValLHS = InVal->getOperand(0);
17032SDValue InValRHS = InVal->getOperand(1);
17033
17034if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))
17035returnSDValue();
17036if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
17037returnSDValue();
17038// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
17039// have different LHS and RHS types.
17040if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
17041returnSDValue();
17042SDValueLHS = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, VT,
17043 InVecLHS, InValLHS, EltNo);
17044SDValueRHS = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, VT,
17045 InVecRHS, InValRHS, EltNo);
17046return DAG.getNode(InVecOpcode,DL, VT,LHS,RHS);
17047 }
17048
17049// Given insert_vector_elt (concat_vectors ...), InVal, Elt
17050// move the insert_vector_elt to the source operand of the concat_vector.
17051if (InVec.getOpcode() !=ISD::CONCAT_VECTORS)
17052returnSDValue();
17053
17054auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17055if (!IndexC)
17056returnSDValue();
17057unsigned Elt = IndexC->getZExtValue();
17058
17059EVT ConcatVT = InVec.getOperand(0).getValueType();
17060if (ConcatVT.getVectorElementType() != InVal.getValueType())
17061returnSDValue();
17062unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17063SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts,DL);
17064
17065unsigned ConcatOpIdx = Elt / ConcatNumElts;
17066SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
17067 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, ConcatVT,
17068 ConcatOp, InVal, NewIdx);
17069
17070SmallVector<SDValue> ConcatOps;
17071 ConcatOps.append(InVec->op_begin(), InVec->op_end());
17072 ConcatOps[ConcatOpIdx] = ConcatOp;
17073return DAG.getNode(ISD::CONCAT_VECTORS,DL, VT, ConcatOps);
17074}
17075
17076// If we're concatenating a series of vector loads like
17077// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
17078// Then we can turn this into a strided load by widening the vector elements
17079// vlse32 p, stride=n
17080staticSDValueperformCONCAT_VECTORSCombine(SDNode *N,SelectionDAG &DAG,
17081constRISCVSubtarget &Subtarget,
17082constRISCVTargetLowering &TLI) {
17083SDLocDL(N);
17084EVT VT =N->getValueType(0);
17085
17086// Only perform this combine on legal MVTs.
17087if (!TLI.isTypeLegal(VT))
17088returnSDValue();
17089
17090// TODO: Potentially extend this to scalable vectors
17091if (VT.isScalableVector())
17092returnSDValue();
17093
17094auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
17095if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
17096 !SDValue(BaseLd, 0).hasOneUse())
17097returnSDValue();
17098
17099EVT BaseLdVT = BaseLd->getValueType(0);
17100
17101// Go through the loads and check that they're strided
17102SmallVector<LoadSDNode *> Lds;
17103 Lds.push_back(BaseLd);
17104AlignAlign = BaseLd->getAlign();
17105for (SDValueOp :N->ops().drop_front()) {
17106auto *Ld = dyn_cast<LoadSDNode>(Op);
17107if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
17108 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
17109 Ld->getValueType(0) != BaseLdVT)
17110returnSDValue();
17111
17112 Lds.push_back(Ld);
17113
17114// The common alignment is the most restrictive (smallest) of all the loads
17115Align = std::min(Align, Ld->getAlign());
17116 }
17117
17118usingPtrDiff = std::pair<std::variant<int64_t, SDValue>,bool>;
17119auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
17120LoadSDNode *Ld2) -> std::optional<PtrDiff> {
17121// If the load ptrs can be decomposed into a common (Base + Index) with a
17122// common constant stride, then return the constant stride.
17123BaseIndexOffset BIO1 =BaseIndexOffset::match(Ld1, DAG);
17124BaseIndexOffset BIO2 =BaseIndexOffset::match(Ld2, DAG);
17125if (BIO1.equalBaseIndex(BIO2, DAG))
17126return {{BIO2.getOffset() - BIO1.getOffset(),false}};
17127
17128// Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17129SDValue P1 = Ld1->getBasePtr();
17130SDValue P2 = Ld2->getBasePtr();
17131if (P2.getOpcode() ==ISD::ADD && P2.getOperand(0) == P1)
17132return {{P2.getOperand(1),false}};
17133if (P1.getOpcode() ==ISD::ADD && P1.getOperand(0) == P2)
17134return {{P1.getOperand(1),true}};
17135
17136return std::nullopt;
17137 };
17138
17139// Get the distance between the first and second loads
17140auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17141if (!BaseDiff)
17142returnSDValue();
17143
17144// Check all the loads are the same distance apart
17145for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17146if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17147returnSDValue();
17148
17149// TODO: At this point, we've successfully matched a generalized gather
17150// load. Maybe we should emit that, and then move the specialized
17151// matchers above and below into a DAG combine?
17152
17153// Get the widened scalar type, e.g. v4i8 -> i64
17154unsigned WideScalarBitWidth =
17155 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17156MVT WideScalarVT =MVT::getIntegerVT(WideScalarBitWidth);
17157
17158// Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17159MVT WideVecVT =MVT::getVectorVT(WideScalarVT,N->getNumOperands());
17160if (!TLI.isTypeLegal(WideVecVT))
17161returnSDValue();
17162
17163// Check that the operation is legal
17164if (!TLI.isLegalStridedLoadStore(WideVecVT,Align))
17165returnSDValue();
17166
17167auto [StrideVariant, MustNegateStride] = *BaseDiff;
17168SDValue Stride =
17169 std::holds_alternative<SDValue>(StrideVariant)
17170 ? std::get<SDValue>(StrideVariant)
17171 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant),DL,
17172 Lds[0]->getOffset().getValueType());
17173if (MustNegateStride)
17174 Stride = DAG.getNegative(Stride,DL, Stride.getValueType());
17175
17176SDValue AllOneMask =
17177 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1),DL,
17178 DAG.getConstant(1,DL, MVT::i1));
17179
17180uint64_t MemSize;
17181if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17182 ConstStride && ConstStride->getSExtValue() >= 0)
17183// total size = (elsize * n) + (stride - elsize) * (n-1)
17184// = elsize + stride * (n-1)
17185 MemSize = WideScalarVT.getSizeInBits() +
17186 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17187else
17188// If Stride isn't constant, then we can't know how much it will load
17189 MemSize =MemoryLocation::UnknownSize;
17190
17191MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
17192 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17193Align);
17194
17195SDValue StridedLoad = DAG.getStridedLoadVP(
17196 WideVecVT,DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17197 AllOneMask,
17198 DAG.getConstant(N->getNumOperands(),DL, Subtarget.getXLenVT()), MMO);
17199
17200for (SDValue Ld :N->ops())
17201 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17202
17203return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17204}
17205
17206staticSDValueperformVECTOR_SHUFFLECombine(SDNode *N,SelectionDAG &DAG,
17207constRISCVSubtarget &Subtarget,
17208constRISCVTargetLowering &TLI) {
17209SDLocDL(N);
17210EVT VT =N->getValueType(0);
17211constunsigned ElementSize = VT.getScalarSizeInBits();
17212constunsigned NumElts = VT.getVectorNumElements();
17213SDValue V1 =N->getOperand(0);
17214SDValue V2 =N->getOperand(1);
17215ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
17216MVT XLenVT = Subtarget.getXLenVT();
17217
17218// Recognized a disguised select of add/sub.
17219bool SwapCC;
17220if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
17221matchSelectAddSub(V1, V2, SwapCC)) {
17222SDValue Sub = SwapCC ? V1 : V2;
17223SDValueA = Sub.getOperand(0);
17224SDValueB = Sub.getOperand(1);
17225
17226SmallVector<SDValue> MaskVals;
17227for (int MaskIndex : Mask) {
17228bool SelectMaskVal = (MaskIndex < (int)NumElts);
17229 MaskVals.push_back(DAG.getConstant(SelectMaskVal,DL, XLenVT));
17230 }
17231assert(MaskVals.size() == NumElts &&"Unexpected select-like shuffle");
17232EVT MaskVT =EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
17233SDValueCC = DAG.getBuildVector(MaskVT,DL, MaskVals);
17234
17235// Arrange the select such that we can match a masked
17236// vrsub.vi to perform the conditional negate
17237SDValue NegB = DAG.getNegative(B,DL, VT);
17238if (!SwapCC)
17239CC = DAG.getLogicalNOT(DL,CC,CC->getValueType(0));
17240SDValue NewB = DAG.getNode(ISD::VSELECT,DL, VT,CC, NegB,B);
17241return DAG.getNode(ISD::ADD,DL, VT,A, NewB);
17242 }
17243
17244// Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
17245// during the combine phase before type legalization, and relies on
17246// DAGCombine not undoing the transform if isShuffleMaskLegal returns false
17247// for the source mask.
17248if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
17249 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
17250 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
17251returnSDValue();
17252
17253SmallVector<int, 8> NewMask;
17254narrowShuffleMaskElts(2, Mask, NewMask);
17255
17256LLVMContext &C = *DAG.getContext();
17257EVT NewEltVT =EVT::getIntegerVT(C, ElementSize / 2);
17258EVT NewVT =EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
17259SDValue Res = DAG.getVectorShuffle(NewVT,DL, DAG.getBitcast(NewVT, V1),
17260 DAG.getBitcast(NewVT, V2), NewMask);
17261return DAG.getBitcast(VT, Res);
17262}
17263
17264staticSDValuecombineToVWMACC(SDNode *N,SelectionDAG &DAG,
17265constRISCVSubtarget &Subtarget) {
17266
17267assert(N->getOpcode() ==RISCVISD::ADD_VL ||N->getOpcode() ==ISD::ADD);
17268
17269if (N->getValueType(0).isFixedLengthVector())
17270returnSDValue();
17271
17272SDValue Addend =N->getOperand(0);
17273SDValue MulOp =N->getOperand(1);
17274
17275if (N->getOpcode() ==RISCVISD::ADD_VL) {
17276SDValue AddPassthruOp =N->getOperand(2);
17277if (!AddPassthruOp.isUndef())
17278returnSDValue();
17279 }
17280
17281auto IsVWMulOpc = [](unsigned Opc) {
17282switch (Opc) {
17283caseRISCVISD::VWMUL_VL:
17284caseRISCVISD::VWMULU_VL:
17285caseRISCVISD::VWMULSU_VL:
17286returntrue;
17287default:
17288returnfalse;
17289 }
17290 };
17291
17292if (!IsVWMulOpc(MulOp.getOpcode()))
17293std::swap(Addend, MulOp);
17294
17295if (!IsVWMulOpc(MulOp.getOpcode()))
17296returnSDValue();
17297
17298SDValue MulPassthruOp = MulOp.getOperand(2);
17299
17300if (!MulPassthruOp.isUndef())
17301returnSDValue();
17302
17303auto [AddMask, AddVL] = [](SDNode *N,SelectionDAG &DAG,
17304constRISCVSubtarget &Subtarget) {
17305if (N->getOpcode() ==ISD::ADD) {
17306SDLocDL(N);
17307returngetDefaultScalableVLOps(N->getSimpleValueType(0),DL, DAG,
17308 Subtarget);
17309 }
17310return std::make_pair(N->getOperand(3),N->getOperand(4));
17311 }(N, DAG, Subtarget);
17312
17313SDValue MulMask = MulOp.getOperand(3);
17314SDValue MulVL = MulOp.getOperand(4);
17315
17316if (AddMask != MulMask || AddVL != MulVL)
17317returnSDValue();
17318
17319unsigned Opc =RISCVISD::VWMACC_VL + MulOp.getOpcode() -RISCVISD::VWMUL_VL;
17320static_assert(RISCVISD::VWMACC_VL + 1 ==RISCVISD::VWMACCU_VL,
17321"Unexpected opcode after VWMACC_VL");
17322static_assert(RISCVISD::VWMACC_VL + 2 ==RISCVISD::VWMACCSU_VL,
17323"Unexpected opcode after VWMACC_VL!");
17324static_assert(RISCVISD::VWMUL_VL + 1 ==RISCVISD::VWMULU_VL,
17325"Unexpected opcode after VWMUL_VL!");
17326static_assert(RISCVISD::VWMUL_VL + 2 ==RISCVISD::VWMULSU_VL,
17327"Unexpected opcode after VWMUL_VL!");
17328
17329SDLocDL(N);
17330EVT VT =N->getValueType(0);
17331SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17332 AddVL};
17333return DAG.getNode(Opc,DL, VT, Ops);
17334}
17335
17336staticboollegalizeScatterGatherIndexType(SDLocDL,SDValue &Index,
17337ISD::MemIndexType &IndexType,
17338RISCVTargetLowering::DAGCombinerInfo &DCI) {
17339if (!DCI.isBeforeLegalize())
17340returnfalse;
17341
17342SelectionDAG &DAG = DCI.DAG;
17343constMVT XLenVT =
17344 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17345
17346constEVT IndexVT = Index.getValueType();
17347
17348// RISC-V indexed loads only support the "unsigned unscaled" addressing
17349// mode, so anything else must be manually legalized.
17350if (!isIndexTypeSigned(IndexType))
17351returnfalse;
17352
17353if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17354// Any index legalization should first promote to XLenVT, so we don't lose
17355// bits when scaling. This may create an illegal index type so we let
17356// LLVM's legalization take care of the splitting.
17357// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17358 Index = DAG.getNode(ISD::SIGN_EXTEND,DL,
17359 IndexVT.changeVectorElementType(XLenVT), Index);
17360 }
17361 IndexType =ISD::UNSIGNED_SCALED;
17362returntrue;
17363}
17364
17365/// Match the index vector of a scatter or gather node as the shuffle mask
17366/// which performs the rearrangement if possible. Will only match if
17367/// all lanes are touched, and thus replacing the scatter or gather with
17368/// a unit strided access and shuffle is legal.
17369staticboolmatchIndexAsShuffle(EVT VT,SDValue Index,SDValue Mask,
17370SmallVector<int> &ShuffleMask) {
17371if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17372returnfalse;
17373if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17374returnfalse;
17375
17376constunsigned ElementSize = VT.getScalarStoreSize();
17377constunsigned NumElems = VT.getVectorNumElements();
17378
17379// Create the shuffle mask and check all bits active
17380assert(ShuffleMask.empty());
17381BitVector ActiveLanes(NumElems);
17382for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17383// TODO: We've found an active bit of UB, and could be
17384// more aggressive here if desired.
17385if (Index->getOperand(i)->isUndef())
17386returnfalse;
17387uint64_tC = Index->getConstantOperandVal(i);
17388if (C % ElementSize != 0)
17389returnfalse;
17390C =C / ElementSize;
17391if (C >= NumElems)
17392returnfalse;
17393 ShuffleMask.push_back(C);
17394 ActiveLanes.set(C);
17395 }
17396return ActiveLanes.all();
17397}
17398
17399/// Match the index of a gather or scatter operation as an operation
17400/// with twice the element width and half the number of elements. This is
17401/// generally profitable (if legal) because these operations are linear
17402/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17403/// come out ahead.
17404staticboolmatchIndexAsWiderOp(EVT VT,SDValue Index,SDValue Mask,
17405Align BaseAlign,constRISCVSubtarget &ST) {
17406if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17407returnfalse;
17408if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17409returnfalse;
17410
17411// Attempt a doubling. If we can use a element type 4x or 8x in
17412// size, this will happen via multiply iterations of the transform.
17413constunsigned NumElems = VT.getVectorNumElements();
17414if (NumElems % 2 != 0)
17415returnfalse;
17416
17417constunsigned ElementSize = VT.getScalarStoreSize();
17418constunsigned WiderElementSize = ElementSize * 2;
17419if (WiderElementSize > ST.getELen()/8)
17420returnfalse;
17421
17422if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17423returnfalse;
17424
17425for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17426// TODO: We've found an active bit of UB, and could be
17427// more aggressive here if desired.
17428if (Index->getOperand(i)->isUndef())
17429returnfalse;
17430// TODO: This offset check is too strict if we support fully
17431// misaligned memory operations.
17432uint64_tC = Index->getConstantOperandVal(i);
17433if (i % 2 == 0) {
17434if (C % WiderElementSize != 0)
17435returnfalse;
17436continue;
17437 }
17438uint64_tLast = Index->getConstantOperandVal(i-1);
17439if (C !=Last + ElementSize)
17440returnfalse;
17441 }
17442returntrue;
17443}
17444
17445// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17446// This would be benefit for the cases where X and Y are both the same value
17447// type of low precision vectors. Since the truncate would be lowered into
17448// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17449// restriction, such pattern would be expanded into a series of "vsetvli"
17450// and "vnsrl" instructions later to reach this point.
17451staticSDValuecombineTruncOfSraSext(SDNode *N,SelectionDAG &DAG) {
17452SDValue Mask =N->getOperand(1);
17453SDValue VL =N->getOperand(2);
17454
17455bool IsVLMAX =isAllOnesConstant(VL) ||
17456 (isa<RegisterSDNode>(VL) &&
17457 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17458if (!IsVLMAX || Mask.getOpcode() !=RISCVISD::VMSET_VL ||
17459 Mask.getOperand(0) != VL)
17460returnSDValue();
17461
17462auto IsTruncNode = [&](SDValue V) {
17463return V.getOpcode() ==RISCVISD::TRUNCATE_VECTOR_VL &&
17464 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17465 };
17466
17467SDValueOp =N->getOperand(0);
17468
17469// We need to first find the inner level of TRUNCATE_VECTOR_VL node
17470// to distinguish such pattern.
17471while (IsTruncNode(Op)) {
17472if (!Op.hasOneUse())
17473returnSDValue();
17474Op =Op.getOperand(0);
17475 }
17476
17477if (Op.getOpcode() !=ISD::SRA || !Op.hasOneUse())
17478returnSDValue();
17479
17480SDValue N0 =Op.getOperand(0);
17481SDValue N1 =Op.getOperand(1);
17482if (N0.getOpcode() !=ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17483 N1.getOpcode() !=ISD::ZERO_EXTEND || !N1.hasOneUse())
17484returnSDValue();
17485
17486SDValue N00 = N0.getOperand(0);
17487SDValue N10 = N1.getOperand(0);
17488if (!N00.getValueType().isVector() ||
17489 N00.getValueType() != N10.getValueType() ||
17490N->getValueType(0) != N10.getValueType())
17491returnSDValue();
17492
17493unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17494SDValueSMin =
17495 DAG.getNode(ISD::SMIN,SDLoc(N1),N->getValueType(0), N10,
17496 DAG.getConstant(MaxShAmt,SDLoc(N1),N->getValueType(0)));
17497return DAG.getNode(ISD::SRA,SDLoc(N),N->getValueType(0), N00,SMin);
17498}
17499
17500// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17501// maximum value for the truncated type.
17502// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17503// is the signed maximum value for the truncated type and C2 is the signed
17504// minimum value.
17505staticSDValuecombineTruncToVnclip(SDNode *N,SelectionDAG &DAG,
17506constRISCVSubtarget &Subtarget) {
17507assert(N->getOpcode() ==RISCVISD::TRUNCATE_VECTOR_VL);
17508
17509MVT VT =N->getSimpleValueType(0);
17510
17511SDValue Mask =N->getOperand(1);
17512SDValue VL =N->getOperand(2);
17513
17514auto MatchMinMax = [&VL, &Mask](SDValue V,unsigned Opc,unsigned OpcVL,
17515APInt &SplatVal) {
17516if (V.getOpcode() != Opc &&
17517 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17518 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17519returnSDValue();
17520
17521SDValueOp = V.getOperand(1);
17522
17523// Peek through conversion between fixed and scalable vectors.
17524if (Op.getOpcode() ==ISD::INSERT_SUBVECTOR &&Op.getOperand(0).isUndef() &&
17525isNullConstant(Op.getOperand(2)) &&
17526Op.getOperand(1).getValueType().isFixedLengthVector() &&
17527Op.getOperand(1).getOpcode() ==ISD::EXTRACT_SUBVECTOR &&
17528Op.getOperand(1).getOperand(0).getValueType() ==Op.getValueType() &&
17529isNullConstant(Op.getOperand(1).getOperand(1)))
17530Op =Op.getOperand(1).getOperand(0);
17531
17532if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17533return V.getOperand(0);
17534
17535if (Op.getOpcode() ==RISCVISD::VMV_V_X_VL &&Op.getOperand(0).isUndef() &&
17536Op.getOperand(2) == VL) {
17537if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17538 SplatVal =
17539 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17540return V.getOperand(0);
17541 }
17542 }
17543
17544returnSDValue();
17545 };
17546
17547SDLocDL(N);
17548
17549auto DetectUSatPattern = [&](SDValue V) {
17550APInt LoC, HiC;
17551
17552// Simple case, V is a UMIN.
17553if (SDValue UMinOp = MatchMinMax(V,ISD::UMIN,RISCVISD::UMIN_VL, HiC))
17554if (HiC.isMask(VT.getScalarSizeInBits()))
17555return UMinOp;
17556
17557// If we have an SMAX that removes negative numbers first, then we can match
17558// SMIN instead of UMIN.
17559if (SDValue SMinOp = MatchMinMax(V,ISD::SMIN,RISCVISD::SMIN_VL, HiC))
17560if (SDValue SMaxOp =
17561 MatchMinMax(SMinOp,ISD::SMAX,RISCVISD::SMAX_VL, LoC))
17562if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17563return SMinOp;
17564
17565// If we have an SMIN before an SMAX and the SMAX constant is less than or
17566// equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17567// first.
17568if (SDValue SMaxOp = MatchMinMax(V,ISD::SMAX,RISCVISD::SMAX_VL, LoC))
17569if (SDValue SMinOp =
17570 MatchMinMax(SMaxOp,ISD::SMIN,RISCVISD::SMIN_VL, HiC))
17571if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17572 HiC.uge(LoC))
17573return DAG.getNode(RISCVISD::SMAX_VL,DL, V.getValueType(), SMinOp,
17574 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17575 Mask, VL);
17576
17577returnSDValue();
17578 };
17579
17580auto DetectSSatPattern = [&](SDValue V) {
17581unsigned NumDstBits = VT.getScalarSizeInBits();
17582unsigned NumSrcBits = V.getScalarValueSizeInBits();
17583APInt SignedMax =APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17584APInt SignedMin =APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17585
17586APInt HiC, LoC;
17587if (SDValue SMinOp = MatchMinMax(V,ISD::SMIN,RISCVISD::SMIN_VL, HiC))
17588if (SDValue SMaxOp =
17589 MatchMinMax(SMinOp,ISD::SMAX,RISCVISD::SMAX_VL, LoC))
17590if (HiC == SignedMax && LoC == SignedMin)
17591return SMaxOp;
17592
17593if (SDValue SMaxOp = MatchMinMax(V,ISD::SMAX,RISCVISD::SMAX_VL, LoC))
17594if (SDValue SMinOp =
17595 MatchMinMax(SMaxOp,ISD::SMIN,RISCVISD::SMIN_VL, HiC))
17596if (HiC == SignedMax && LoC == SignedMin)
17597return SMinOp;
17598
17599returnSDValue();
17600 };
17601
17602SDValue Src =N->getOperand(0);
17603
17604// Look through multiple layers of truncates.
17605while (Src.getOpcode() ==RISCVISD::TRUNCATE_VECTOR_VL &&
17606 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17607 Src.hasOneUse())
17608 Src = Src.getOperand(0);
17609
17610SDValue Val;
17611unsigned ClipOpc;
17612if ((Val = DetectUSatPattern(Src)))
17613 ClipOpc =RISCVISD::TRUNCATE_VECTOR_VL_USAT;
17614elseif ((Val = DetectSSatPattern(Src)))
17615 ClipOpc =RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
17616else
17617returnSDValue();
17618
17619MVT ValVT = Val.getSimpleValueType();
17620
17621do {
17622MVT ValEltVT =MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17623 ValVT = ValVT.changeVectorElementType(ValEltVT);
17624 Val = DAG.getNode(ClipOpc,DL, ValVT, Val, Mask, VL);
17625 }while (ValVT != VT);
17626
17627return Val;
17628}
17629
17630// Convert
17631// (iX ctpop (bitcast (vXi1 A)))
17632// ->
17633// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17634// FIXME: It's complicated to match all the variations of this after type
17635// legalization so we only handle the pre-type legalization pattern, but that
17636// requires the fixed vector type to be legal.
17637staticSDValuecombineScalarCTPOPToVCPOP(SDNode *N,SelectionDAG &DAG,
17638constRISCVSubtarget &Subtarget) {
17639EVT VT =N->getValueType(0);
17640if (!VT.isScalarInteger())
17641returnSDValue();
17642
17643SDValue Src =N->getOperand(0);
17644
17645// Peek through zero_extend. It doesn't change the count.
17646if (Src.getOpcode() ==ISD::ZERO_EXTEND)
17647 Src = Src.getOperand(0);
17648
17649if (Src.getOpcode() !=ISD::BITCAST)
17650returnSDValue();
17651
17652 Src = Src.getOperand(0);
17653EVT SrcEVT = Src.getValueType();
17654if (!SrcEVT.isSimple())
17655returnSDValue();
17656
17657MVT SrcMVT = SrcEVT.getSimpleVT();
17658// Make sure the input is an i1 vector.
17659if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17660returnSDValue();
17661
17662if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17663returnSDValue();
17664
17665MVT ContainerVT =getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17666 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17667
17668SDLocDL(N);
17669auto [Mask, VL] =getDefaultVLOps(SrcMVT, ContainerVT,DL, DAG, Subtarget);
17670
17671MVT XLenVT = Subtarget.getXLenVT();
17672SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Src, Mask, VL);
17673return DAG.getZExtOrTrunc(Pop,DL, VT);
17674}
17675
17676SDValueRISCVTargetLowering::PerformDAGCombine(SDNode *N,
17677DAGCombinerInfo &DCI) const{
17678SelectionDAG &DAG = DCI.DAG;
17679constMVT XLenVT = Subtarget.getXLenVT();
17680SDLocDL(N);
17681
17682// Helper to call SimplifyDemandedBits on an operand of N where only some low
17683// bits are demanded. N will be added to the Worklist if it was not deleted.
17684// Caller should return SDValue(N, 0) if this returns true.
17685auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo,unsigned LowBits) {
17686SDValueOp =N->getOperand(OpNo);
17687APInt Mask =APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17688if (!SimplifyDemandedBits(Op, Mask, DCI))
17689returnfalse;
17690
17691if (N->getOpcode() !=ISD::DELETED_NODE)
17692 DCI.AddToWorklist(N);
17693returntrue;
17694 };
17695
17696switch (N->getOpcode()) {
17697default:
17698break;
17699caseRISCVISD::SplitF64: {
17700SDValue Op0 =N->getOperand(0);
17701// If the input to SplitF64 is just BuildPairF64 then the operation is
17702// redundant. Instead, use BuildPairF64's operands directly.
17703if (Op0->getOpcode() ==RISCVISD::BuildPairF64)
17704return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17705
17706if (Op0->isUndef()) {
17707SDValueLo = DAG.getUNDEF(MVT::i32);
17708SDValueHi = DAG.getUNDEF(MVT::i32);
17709return DCI.CombineTo(N,Lo,Hi);
17710 }
17711
17712// It's cheaper to materialise two 32-bit integers than to load a double
17713// from the constant pool and transfer it to integer registers through the
17714// stack.
17715if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17716APInt V =C->getValueAPF().bitcastToAPInt();
17717SDValueLo = DAG.getConstant(V.trunc(32),DL, MVT::i32);
17718SDValueHi = DAG.getConstant(V.lshr(32).trunc(32),DL, MVT::i32);
17719return DCI.CombineTo(N,Lo,Hi);
17720 }
17721
17722// This is a target-specific version of a DAGCombine performed in
17723// DAGCombiner::visitBITCAST. It performs the equivalent of:
17724// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17725// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17726if (!(Op0.getOpcode() ==ISD::FNEG || Op0.getOpcode() ==ISD::FABS) ||
17727 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17728break;
17729SDValue NewSplitF64 =
17730 DAG.getNode(RISCVISD::SplitF64,DL, DAG.getVTList(MVT::i32, MVT::i32),
17731 Op0.getOperand(0));
17732SDValueLo = NewSplitF64.getValue(0);
17733SDValueHi = NewSplitF64.getValue(1);
17734APInt SignBit =APInt::getSignMask(32);
17735if (Op0.getOpcode() ==ISD::FNEG) {
17736SDValue NewHi = DAG.getNode(ISD::XOR,DL, MVT::i32,Hi,
17737 DAG.getConstant(SignBit,DL, MVT::i32));
17738return DCI.CombineTo(N,Lo, NewHi);
17739 }
17740assert(Op0.getOpcode() ==ISD::FABS);
17741SDValue NewHi = DAG.getNode(ISD::AND,DL, MVT::i32,Hi,
17742 DAG.getConstant(~SignBit,DL, MVT::i32));
17743return DCI.CombineTo(N,Lo, NewHi);
17744 }
17745caseRISCVISD::SLLW:
17746caseRISCVISD::SRAW:
17747caseRISCVISD::SRLW:
17748caseRISCVISD::RORW:
17749caseRISCVISD::ROLW: {
17750// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17751if (SimplifyDemandedLowBitsHelper(0, 32) ||
17752 SimplifyDemandedLowBitsHelper(1, 5))
17753returnSDValue(N, 0);
17754
17755break;
17756 }
17757caseRISCVISD::CLZW:
17758caseRISCVISD::CTZW: {
17759// Only the lower 32 bits of the first operand are read
17760if (SimplifyDemandedLowBitsHelper(0, 32))
17761returnSDValue(N, 0);
17762break;
17763 }
17764caseRISCVISD::FMV_W_X_RV64: {
17765// If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17766// conversion is unnecessary and can be replaced with the
17767// FMV_X_ANYEXTW_RV64 operand.
17768SDValue Op0 =N->getOperand(0);
17769if (Op0.getOpcode() ==RISCVISD::FMV_X_ANYEXTW_RV64)
17770return Op0.getOperand(0);
17771break;
17772 }
17773caseRISCVISD::FMV_X_ANYEXTH:
17774caseRISCVISD::FMV_X_ANYEXTW_RV64: {
17775SDLocDL(N);
17776SDValue Op0 =N->getOperand(0);
17777MVT VT =N->getSimpleValueType(0);
17778
17779// Constant fold.
17780if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17781APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17782return DAG.getConstant(Val,DL, VT);
17783 }
17784
17785// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17786// conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17787// operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17788if ((N->getOpcode() ==RISCVISD::FMV_X_ANYEXTW_RV64 &&
17789 Op0->getOpcode() ==RISCVISD::FMV_W_X_RV64) ||
17790 (N->getOpcode() ==RISCVISD::FMV_X_ANYEXTH &&
17791 Op0->getOpcode() ==RISCVISD::FMV_H_X)) {
17792assert(Op0.getOperand(0).getValueType() == VT &&
17793"Unexpected value type!");
17794return Op0.getOperand(0);
17795 }
17796
17797if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17798 cast<LoadSDNode>(Op0)->isSimple()) {
17799MVT IVT =MVT::getIntegerVT(Op0.getValueSizeInBits());
17800auto *LN0 = cast<LoadSDNode>(Op0);
17801SDValue Load =
17802 DAG.getExtLoad(ISD::EXTLOAD,SDLoc(N), VT, LN0->getChain(),
17803 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17804 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17805return Load;
17806 }
17807
17808// This is a target-specific version of a DAGCombine performed in
17809// DAGCombiner::visitBITCAST. It performs the equivalent of:
17810// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17811// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17812if (!(Op0.getOpcode() ==ISD::FNEG || Op0.getOpcode() ==ISD::FABS) ||
17813 !Op0.getNode()->hasOneUse())
17814break;
17815SDValue NewFMV = DAG.getNode(N->getOpcode(),DL, VT, Op0.getOperand(0));
17816unsigned FPBits =N->getOpcode() ==RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17817APInt SignBit =APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17818if (Op0.getOpcode() ==ISD::FNEG)
17819return DAG.getNode(ISD::XOR,DL, VT, NewFMV,
17820 DAG.getConstant(SignBit,DL, VT));
17821
17822assert(Op0.getOpcode() ==ISD::FABS);
17823return DAG.getNode(ISD::AND,DL, VT, NewFMV,
17824 DAG.getConstant(~SignBit,DL, VT));
17825 }
17826caseISD::ABS: {
17827EVT VT =N->getValueType(0);
17828SDValue N0 =N->getOperand(0);
17829// abs (sext) -> zext (abs)
17830// abs (zext) -> zext (handled elsewhere)
17831if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() ==ISD::SIGN_EXTEND) {
17832SDValue Src = N0.getOperand(0);
17833SDLocDL(N);
17834return DAG.getNode(ISD::ZERO_EXTEND,DL, VT,
17835 DAG.getNode(ISD::ABS,DL, Src.getValueType(), Src));
17836 }
17837break;
17838 }
17839caseISD::ADD: {
17840if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17841return V;
17842if (SDValue V =combineToVWMACC(N, DAG, Subtarget))
17843return V;
17844returnperformADDCombine(N, DCI, Subtarget);
17845 }
17846caseISD::SUB: {
17847if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17848return V;
17849returnperformSUBCombine(N, DAG, Subtarget);
17850 }
17851caseISD::AND:
17852returnperformANDCombine(N, DCI, Subtarget);
17853caseISD::OR: {
17854if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17855return V;
17856returnperformORCombine(N, DCI, Subtarget);
17857 }
17858caseISD::XOR:
17859returnperformXORCombine(N, DAG, Subtarget);
17860caseISD::MUL:
17861if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17862return V;
17863returnperformMULCombine(N, DAG, DCI, Subtarget);
17864caseISD::SDIV:
17865caseISD::UDIV:
17866caseISD::SREM:
17867caseISD::UREM:
17868if (SDValue V =combineBinOpOfZExt(N, DAG))
17869return V;
17870break;
17871caseISD::FMUL: {
17872// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17873SDValue N0 =N->getOperand(0);
17874SDValue N1 =N->getOperand(1);
17875if (N0->getOpcode() !=ISD::FCOPYSIGN)
17876std::swap(N0, N1);
17877if (N0->getOpcode() !=ISD::FCOPYSIGN)
17878returnSDValue();
17879ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17880if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17881returnSDValue();
17882EVT VT =N->getValueType(0);
17883if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17884returnSDValue();
17885SDValue Sign = N0->getOperand(1);
17886if (Sign.getValueType() != VT)
17887returnSDValue();
17888return DAG.getNode(RISCVISD::FSGNJX,SDLoc(N), VT, N1, N0->getOperand(1));
17889 }
17890caseISD::FADD:
17891caseISD::UMAX:
17892caseISD::UMIN:
17893caseISD::SMAX:
17894caseISD::SMIN:
17895caseISD::FMAXNUM:
17896caseISD::FMINNUM: {
17897if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))
17898return V;
17899if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17900return V;
17901returnSDValue();
17902 }
17903caseISD::SETCC:
17904returnperformSETCCCombine(N, DAG, Subtarget);
17905caseISD::SIGN_EXTEND_INREG:
17906returnperformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17907caseISD::ZERO_EXTEND:
17908// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17909// type legalization. This is safe because fp_to_uint produces poison if
17910// it overflows.
17911if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17912SDValue Src =N->getOperand(0);
17913if (Src.getOpcode() ==ISD::FP_TO_UINT &&
17914isTypeLegal(Src.getOperand(0).getValueType()))
17915return DAG.getNode(ISD::FP_TO_UINT,SDLoc(N), MVT::i64,
17916 Src.getOperand(0));
17917if (Src.getOpcode() ==ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17918isTypeLegal(Src.getOperand(1).getValueType())) {
17919SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17920SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT,SDLoc(N), VTs,
17921 Src.getOperand(0), Src.getOperand(1));
17922 DCI.CombineTo(N, Res);
17923 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17924 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17925returnSDValue(N, 0);// Return N so it doesn't get rechecked.
17926 }
17927 }
17928returnSDValue();
17929caseRISCVISD::TRUNCATE_VECTOR_VL:
17930if (SDValue V =combineTruncOfSraSext(N, DAG))
17931return V;
17932returncombineTruncToVnclip(N, DAG, Subtarget);
17933caseISD::TRUNCATE:
17934returnperformTRUNCATECombine(N, DAG, Subtarget);
17935caseISD::SELECT:
17936returnperformSELECTCombine(N, DAG, Subtarget);
17937caseISD::VSELECT:
17938returnperformVSELECTCombine(N, DAG);
17939caseRISCVISD::CZERO_EQZ:
17940caseRISCVISD::CZERO_NEZ: {
17941SDValue Val =N->getOperand(0);
17942SDValueCond =N->getOperand(1);
17943
17944unsigned Opc =N->getOpcode();
17945
17946// czero_eqz x, x -> x
17947if (Opc ==RISCVISD::CZERO_EQZ && Val ==Cond)
17948return Val;
17949
17950unsigned InvOpc =
17951 Opc ==RISCVISD::CZERO_EQZ ?RISCVISD::CZERO_NEZ :RISCVISD::CZERO_EQZ;
17952
17953// czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17954// czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17955if (Cond.getOpcode() ==ISD::XOR &&isOneConstant(Cond.getOperand(1))) {
17956SDValue NewCond =Cond.getOperand(0);
17957APInt Mask =APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17958if (DAG.MaskedValueIsZero(NewCond, Mask))
17959return DAG.getNode(InvOpc,SDLoc(N),N->getValueType(0), Val, NewCond);
17960 }
17961// czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17962// czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17963// czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17964// czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17965if (Cond.getOpcode() ==ISD::SETCC &&isNullConstant(Cond.getOperand(1))) {
17966ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17967if (ISD::isIntEqualitySetCC(CCVal))
17968return DAG.getNode(CCVal ==ISD::SETNE ? Opc : InvOpc,SDLoc(N),
17969N->getValueType(0), Val,Cond.getOperand(0));
17970 }
17971returnSDValue();
17972 }
17973caseRISCVISD::SELECT_CC: {
17974// Transform
17975SDValueLHS =N->getOperand(0);
17976SDValueRHS =N->getOperand(1);
17977SDValueCC =N->getOperand(2);
17978ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17979SDValue TrueV =N->getOperand(3);
17980SDValue FalseV =N->getOperand(4);
17981SDLocDL(N);
17982EVT VT =N->getValueType(0);
17983
17984// If the True and False values are the same, we don't need a select_cc.
17985if (TrueV == FalseV)
17986return TrueV;
17987
17988// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17989// (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17990if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17991 isa<ConstantSDNode>(FalseV) &&isNullConstant(RHS) &&
17992 (CCVal ==ISD::CondCode::SETLT || CCVal ==ISD::CondCode::SETGE)) {
17993if (CCVal ==ISD::CondCode::SETGE)
17994std::swap(TrueV, FalseV);
17995
17996 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17997 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17998// Only handle simm12, if it is not in this range, it can be considered as
17999// register.
18000if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
18001 isInt<12>(TrueSImm - FalseSImm)) {
18002SDValue SRA =
18003 DAG.getNode(ISD::SRA,DL, VT,LHS,
18004 DAG.getConstant(Subtarget.getXLen() - 1,DL, VT));
18005SDValue AND =
18006 DAG.getNode(ISD::AND,DL, VT, SRA,
18007 DAG.getSignedConstant(TrueSImm - FalseSImm,DL, VT));
18008return DAG.getNode(ISD::ADD,DL, VT, AND, FalseV);
18009 }
18010
18011if (CCVal ==ISD::CondCode::SETGE)
18012std::swap(TrueV, FalseV);
18013 }
18014
18015if (combine_CC(LHS,RHS,CC,DL, DAG, Subtarget))
18016return DAG.getNode(RISCVISD::SELECT_CC,DL,N->getValueType(0),
18017 {LHS, RHS, CC, TrueV, FalseV});
18018
18019if (!Subtarget.hasConditionalMoveFusion()) {
18020// (select c, -1, y) -> -c | y
18021if (isAllOnesConstant(TrueV)) {
18022SDValueC = DAG.getSetCC(DL, VT,LHS,RHS, CCVal);
18023SDValue Neg = DAG.getNegative(C,DL, VT);
18024return DAG.getNode(ISD::OR,DL, VT, Neg, FalseV);
18025 }
18026// (select c, y, -1) -> -!c | y
18027if (isAllOnesConstant(FalseV)) {
18028SDValueC =
18029 DAG.getSetCC(DL, VT,LHS,RHS,ISD::getSetCCInverse(CCVal, VT));
18030SDValue Neg = DAG.getNegative(C,DL, VT);
18031return DAG.getNode(ISD::OR,DL, VT, Neg, TrueV);
18032 }
18033
18034// (select c, 0, y) -> -!c & y
18035if (isNullConstant(TrueV)) {
18036SDValueC =
18037 DAG.getSetCC(DL, VT,LHS,RHS,ISD::getSetCCInverse(CCVal, VT));
18038SDValue Neg = DAG.getNegative(C,DL, VT);
18039return DAG.getNode(ISD::AND,DL, VT, Neg, FalseV);
18040 }
18041// (select c, y, 0) -> -c & y
18042if (isNullConstant(FalseV)) {
18043SDValueC = DAG.getSetCC(DL, VT,LHS,RHS, CCVal);
18044SDValue Neg = DAG.getNegative(C,DL, VT);
18045return DAG.getNode(ISD::AND,DL, VT, Neg, TrueV);
18046 }
18047// (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
18048// (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
18049if (((isOneConstant(FalseV) &&LHS == TrueV &&
18050 CCVal ==ISD::CondCode::SETNE) ||
18051 (isOneConstant(TrueV) &&LHS == FalseV &&
18052 CCVal ==ISD::CondCode::SETEQ)) &&
18053isNullConstant(RHS)) {
18054// freeze it to be safe.
18055LHS = DAG.getFreeze(LHS);
18056SDValueC = DAG.getSetCC(DL, VT,LHS,RHS,ISD::CondCode::SETEQ);
18057return DAG.getNode(ISD::ADD,DL, VT,LHS,C);
18058 }
18059 }
18060
18061// If both true/false are an xor with 1, pull through the select.
18062// This can occur after op legalization if both operands are setccs that
18063// require an xor to invert.
18064// FIXME: Generalize to other binary ops with identical operand?
18065if (TrueV.getOpcode() ==ISD::XOR && FalseV.getOpcode() ==ISD::XOR &&
18066 TrueV.getOperand(1) == FalseV.getOperand(1) &&
18067isOneConstant(TrueV.getOperand(1)) &&
18068 TrueV.hasOneUse() && FalseV.hasOneUse()) {
18069SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC,DL, VT,LHS,RHS,CC,
18070 TrueV.getOperand(0), FalseV.getOperand(0));
18071return DAG.getNode(ISD::XOR,DL, VT, NewSel, TrueV.getOperand(1));
18072 }
18073
18074returnSDValue();
18075 }
18076caseRISCVISD::BR_CC: {
18077SDValueLHS =N->getOperand(1);
18078SDValueRHS =N->getOperand(2);
18079SDValueCC =N->getOperand(3);
18080SDLocDL(N);
18081
18082if (combine_CC(LHS,RHS,CC,DL, DAG, Subtarget))
18083return DAG.getNode(RISCVISD::BR_CC,DL,N->getValueType(0),
18084N->getOperand(0),LHS,RHS,CC,N->getOperand(4));
18085
18086returnSDValue();
18087 }
18088caseISD::BITREVERSE:
18089returnperformBITREVERSECombine(N, DAG, Subtarget);
18090caseISD::FP_TO_SINT:
18091caseISD::FP_TO_UINT:
18092returnperformFP_TO_INTCombine(N, DCI, Subtarget);
18093caseISD::FP_TO_SINT_SAT:
18094caseISD::FP_TO_UINT_SAT:
18095returnperformFP_TO_INT_SATCombine(N, DCI, Subtarget);
18096caseISD::FCOPYSIGN: {
18097EVT VT =N->getValueType(0);
18098if (!VT.isVector())
18099break;
18100// There is a form of VFSGNJ which injects the negated sign of its second
18101// operand. Try and bubble any FNEG up after the extend/round to produce
18102// this optimized pattern. Avoid modifying cases where FP_ROUND and
18103// TRUNC=1.
18104SDValue In2 =N->getOperand(1);
18105// Avoid cases where the extend/round has multiple uses, as duplicating
18106// those is typically more expensive than removing a fneg.
18107if (!In2.hasOneUse())
18108break;
18109if (In2.getOpcode() !=ISD::FP_EXTEND &&
18110 (In2.getOpcode() !=ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
18111break;
18112 In2 = In2.getOperand(0);
18113if (In2.getOpcode() !=ISD::FNEG)
18114break;
18115SDLocDL(N);
18116SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0),DL, VT);
18117return DAG.getNode(ISD::FCOPYSIGN,DL, VT,N->getOperand(0),
18118 DAG.getNode(ISD::FNEG,DL, VT, NewFPExtRound));
18119 }
18120caseISD::MGATHER: {
18121constauto *MGN = cast<MaskedGatherSDNode>(N);
18122constEVT VT =N->getValueType(0);
18123SDValue Index = MGN->getIndex();
18124SDValue ScaleOp = MGN->getScale();
18125ISD::MemIndexType IndexType = MGN->getIndexType();
18126assert(!MGN->isIndexScaled() &&
18127"Scaled gather/scatter should not be formed");
18128
18129SDLocDL(N);
18130if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18131return DAG.getMaskedGather(
18132N->getVTList(), MGN->getMemoryVT(),DL,
18133 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18134 MGN->getBasePtr(), Index, ScaleOp},
18135 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18136
18137if (narrowIndex(Index, IndexType, DAG))
18138return DAG.getMaskedGather(
18139N->getVTList(), MGN->getMemoryVT(),DL,
18140 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18141 MGN->getBasePtr(), Index, ScaleOp},
18142 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18143
18144if (Index.getOpcode() ==ISD::BUILD_VECTOR &&
18145 MGN->getExtensionType() ==ISD::NON_EXTLOAD &&isTypeLegal(VT)) {
18146// The sequence will be XLenVT, not the type of Index. Tell
18147// isSimpleVIDSequence this so we avoid overflow.
18148if (std::optional<VIDSequence> SimpleVID =
18149isSimpleVIDSequence(Index, Subtarget.getXLen());
18150 SimpleVID && SimpleVID->StepDenominator == 1) {
18151const int64_t StepNumerator = SimpleVID->StepNumerator;
18152const int64_t Addend = SimpleVID->Addend;
18153
18154// Note: We don't need to check alignment here since (by assumption
18155// from the existance of the gather), our offsets must be sufficiently
18156// aligned.
18157
18158constEVT PtrVT =getPointerTy(DAG.getDataLayout());
18159assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18160assert(IndexType ==ISD::UNSIGNED_SCALED);
18161SDValue BasePtr = DAG.getNode(ISD::ADD,DL, PtrVT, MGN->getBasePtr(),
18162 DAG.getSignedConstant(Addend,DL, PtrVT));
18163
18164SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
18165 VT.getVectorElementCount());
18166SDValue StridedLoad = DAG.getStridedLoadVP(
18167 VT,DL, MGN->getChain(), BasePtr,
18168 DAG.getSignedConstant(StepNumerator,DL, XLenVT), MGN->getMask(),
18169 EVL, MGN->getMemOperand());
18170SDValue VPSelect = DAG.getNode(ISD::VP_SELECT,DL, VT, MGN->getMask(),
18171 StridedLoad, MGN->getPassThru(), EVL);
18172return DAG.getMergeValues({VPSelect,SDValue(StridedLoad.getNode(), 1)},
18173DL);
18174 }
18175 }
18176
18177SmallVector<int> ShuffleMask;
18178if (MGN->getExtensionType() ==ISD::NON_EXTLOAD &&
18179matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
18180SDValue Load = DAG.getMaskedLoad(VT,DL, MGN->getChain(),
18181 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
18182 MGN->getMask(), DAG.getUNDEF(VT),
18183 MGN->getMemoryVT(), MGN->getMemOperand(),
18184ISD::UNINDEXED,ISD::NON_EXTLOAD);
18185SDValue Shuffle =
18186 DAG.getVectorShuffle(VT,DL, Load, DAG.getUNDEF(VT), ShuffleMask);
18187return DAG.getMergeValues({Shuffle, Load.getValue(1)},DL);
18188 }
18189
18190if (MGN->getExtensionType() ==ISD::NON_EXTLOAD &&
18191matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18192 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18193SmallVector<SDValue> NewIndices;
18194for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18195 NewIndices.push_back(Index.getOperand(i));
18196EVT IndexVT = Index.getValueType()
18197 .getHalfNumVectorElementsVT(*DAG.getContext());
18198 Index = DAG.getBuildVector(IndexVT,DL, NewIndices);
18199
18200unsigned ElementSize = VT.getScalarStoreSize();
18201EVT WideScalarVT =MVT::getIntegerVT(ElementSize * 8 * 2);
18202auto EltCnt = VT.getVectorElementCount();
18203assert(EltCnt.isKnownEven() &&"Splitting vector, but not in half!");
18204EVT WideVT =EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18205 EltCnt.divideCoefficientBy(2));
18206SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18207EVT MaskVT =EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18208 EltCnt.divideCoefficientBy(2));
18209SDValue Mask = DAG.getSplat(MaskVT,DL, DAG.getConstant(1,DL, MVT::i1));
18210
18211SDValue Gather =
18212 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT,DL,
18213 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18214 Index, ScaleOp},
18215 MGN->getMemOperand(), IndexType,ISD::NON_EXTLOAD);
18216SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18217return DAG.getMergeValues({Result, Gather.getValue(1)},DL);
18218 }
18219break;
18220 }
18221caseISD::MSCATTER:{
18222constauto *MSN = cast<MaskedScatterSDNode>(N);
18223SDValue Index = MSN->getIndex();
18224SDValue ScaleOp = MSN->getScale();
18225ISD::MemIndexType IndexType = MSN->getIndexType();
18226assert(!MSN->isIndexScaled() &&
18227"Scaled gather/scatter should not be formed");
18228
18229SDLocDL(N);
18230if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18231return DAG.getMaskedScatter(
18232N->getVTList(), MSN->getMemoryVT(),DL,
18233 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18234 Index, ScaleOp},
18235 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18236
18237if (narrowIndex(Index, IndexType, DAG))
18238return DAG.getMaskedScatter(
18239N->getVTList(), MSN->getMemoryVT(),DL,
18240 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18241 Index, ScaleOp},
18242 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18243
18244EVT VT = MSN->getValue()->getValueType(0);
18245SmallVector<int> ShuffleMask;
18246if (!MSN->isTruncatingStore() &&
18247matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18248SDValue Shuffle = DAG.getVectorShuffle(VT,DL, MSN->getValue(),
18249 DAG.getUNDEF(VT), ShuffleMask);
18250return DAG.getMaskedStore(MSN->getChain(),DL, Shuffle, MSN->getBasePtr(),
18251 DAG.getUNDEF(XLenVT), MSN->getMask(),
18252 MSN->getMemoryVT(), MSN->getMemOperand(),
18253ISD::UNINDEXED,false);
18254 }
18255break;
18256 }
18257case ISD::VP_GATHER: {
18258constauto *VPGN = cast<VPGatherSDNode>(N);
18259SDValue Index = VPGN->getIndex();
18260SDValue ScaleOp = VPGN->getScale();
18261ISD::MemIndexType IndexType = VPGN->getIndexType();
18262assert(!VPGN->isIndexScaled() &&
18263"Scaled gather/scatter should not be formed");
18264
18265SDLocDL(N);
18266if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18267return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(),DL,
18268 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18269 ScaleOp, VPGN->getMask(),
18270 VPGN->getVectorLength()},
18271 VPGN->getMemOperand(), IndexType);
18272
18273if (narrowIndex(Index, IndexType, DAG))
18274return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(),DL,
18275 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18276 ScaleOp, VPGN->getMask(),
18277 VPGN->getVectorLength()},
18278 VPGN->getMemOperand(), IndexType);
18279
18280break;
18281 }
18282case ISD::VP_SCATTER: {
18283constauto *VPSN = cast<VPScatterSDNode>(N);
18284SDValue Index = VPSN->getIndex();
18285SDValue ScaleOp = VPSN->getScale();
18286ISD::MemIndexType IndexType = VPSN->getIndexType();
18287assert(!VPSN->isIndexScaled() &&
18288"Scaled gather/scatter should not be formed");
18289
18290SDLocDL(N);
18291if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18292return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(),DL,
18293 {VPSN->getChain(), VPSN->getValue(),
18294 VPSN->getBasePtr(), Index, ScaleOp,
18295 VPSN->getMask(), VPSN->getVectorLength()},
18296 VPSN->getMemOperand(), IndexType);
18297
18298if (narrowIndex(Index, IndexType, DAG))
18299return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(),DL,
18300 {VPSN->getChain(), VPSN->getValue(),
18301 VPSN->getBasePtr(), Index, ScaleOp,
18302 VPSN->getMask(), VPSN->getVectorLength()},
18303 VPSN->getMemOperand(), IndexType);
18304break;
18305 }
18306caseRISCVISD::SHL_VL:
18307if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18308return V;
18309 [[fallthrough]];
18310caseRISCVISD::SRA_VL:
18311caseRISCVISD::SRL_VL: {
18312SDValue ShAmt =N->getOperand(1);
18313if (ShAmt.getOpcode() ==RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
18314// We don't need the upper 32 bits of a 64-bit element for a shift amount.
18315SDLocDL(N);
18316SDValue VL =N->getOperand(4);
18317EVT VT =N->getValueType(0);
18318 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),
18319 ShAmt.getOperand(1), VL);
18320return DAG.getNode(N->getOpcode(),DL, VT,N->getOperand(0), ShAmt,
18321N->getOperand(2),N->getOperand(3),N->getOperand(4));
18322 }
18323break;
18324 }
18325caseISD::SRA:
18326if (SDValue V =performSRACombine(N, DAG, Subtarget))
18327return V;
18328 [[fallthrough]];
18329caseISD::SRL:
18330caseISD::SHL: {
18331if (N->getOpcode() ==ISD::SHL) {
18332if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18333return V;
18334 }
18335SDValue ShAmt =N->getOperand(1);
18336if (ShAmt.getOpcode() ==RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
18337// We don't need the upper 32 bits of a 64-bit element for a shift amount.
18338SDLocDL(N);
18339EVT VT =N->getValueType(0);
18340 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),
18341 ShAmt.getOperand(1),
18342 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18343return DAG.getNode(N->getOpcode(),DL, VT,N->getOperand(0), ShAmt);
18344 }
18345break;
18346 }
18347caseRISCVISD::ADD_VL:
18348if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18349return V;
18350returncombineToVWMACC(N, DAG, Subtarget);
18351caseRISCVISD::VWADD_W_VL:
18352caseRISCVISD::VWADDU_W_VL:
18353caseRISCVISD::VWSUB_W_VL:
18354caseRISCVISD::VWSUBU_W_VL:
18355returnperformVWADDSUBW_VLCombine(N, DCI, Subtarget);
18356caseRISCVISD::SUB_VL:
18357caseRISCVISD::MUL_VL:
18358returncombineOp_VLToVWOp_VL(N, DCI, Subtarget);
18359caseRISCVISD::VFMADD_VL:
18360caseRISCVISD::VFNMADD_VL:
18361caseRISCVISD::VFMSUB_VL:
18362caseRISCVISD::VFNMSUB_VL:
18363caseRISCVISD::STRICT_VFMADD_VL:
18364caseRISCVISD::STRICT_VFNMADD_VL:
18365caseRISCVISD::STRICT_VFMSUB_VL:
18366caseRISCVISD::STRICT_VFNMSUB_VL:
18367returnperformVFMADD_VLCombine(N, DCI, Subtarget);
18368caseRISCVISD::FADD_VL:
18369caseRISCVISD::FSUB_VL:
18370caseRISCVISD::FMUL_VL:
18371caseRISCVISD::VFWADD_W_VL:
18372caseRISCVISD::VFWSUB_W_VL:
18373returncombineOp_VLToVWOp_VL(N, DCI, Subtarget);
18374caseISD::LOAD:
18375caseISD::STORE: {
18376if (DCI.isAfterLegalizeDAG())
18377if (SDValue V =performMemPairCombine(N, DCI))
18378return V;
18379
18380if (N->getOpcode() !=ISD::STORE)
18381break;
18382
18383auto *Store = cast<StoreSDNode>(N);
18384SDValue Chain = Store->getChain();
18385EVT MemVT = Store->getMemoryVT();
18386SDValue Val = Store->getValue();
18387SDLocDL(N);
18388
18389bool IsScalarizable =
18390 MemVT.isFixedLengthVector() &&ISD::isNormalStore(Store) &&
18391 Store->isSimple() &&
18392 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18393isPowerOf2_64(MemVT.getSizeInBits()) &&
18394 MemVT.getSizeInBits() <= Subtarget.getXLen();
18395
18396// If sufficiently aligned we can scalarize stores of constant vectors of
18397// any power-of-two size up to XLen bits, provided that they aren't too
18398// expensive to materialize.
18399// vsetivli zero, 2, e8, m1, ta, ma
18400// vmv.v.i v8, 4
18401// vse64.v v8, (a0)
18402// ->
18403// li a1, 1028
18404// sh a1, 0(a0)
18405if (DCI.isBeforeLegalize() && IsScalarizable &&
18406ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
18407// Get the constant vector bits
18408APInt NewC(Val.getValueSizeInBits(), 0);
18409uint64_t EltSize = Val.getScalarValueSizeInBits();
18410for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18411if (Val.getOperand(i).isUndef())
18412continue;
18413 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18414 i * EltSize);
18415 }
18416MVT NewVT =MVT::getIntegerVT(MemVT.getSizeInBits());
18417
18418if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18419true) <= 2 &&
18420allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
18421 NewVT, *Store->getMemOperand())) {
18422SDValue NewV = DAG.getConstant(NewC,DL, NewVT);
18423return DAG.getStore(Chain,DL, NewV, Store->getBasePtr(),
18424 Store->getPointerInfo(), Store->getOriginalAlign(),
18425 Store->getMemOperand()->getFlags());
18426 }
18427 }
18428
18429// Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18430// vsetivli zero, 2, e16, m1, ta, ma
18431// vle16.v v8, (a0)
18432// vse16.v v8, (a1)
18433if (auto *L = dyn_cast<LoadSDNode>(Val);
18434 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18435 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18436 Store->getChain() ==SDValue(L, 1) &&ISD::isNormalLoad(L) &&
18437 L->getMemoryVT() == MemVT) {
18438MVT NewVT =MVT::getIntegerVT(MemVT.getSizeInBits());
18439if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
18440 NewVT, *Store->getMemOperand()) &&
18441allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
18442 NewVT, *L->getMemOperand())) {
18443SDValue NewL = DAG.getLoad(NewVT,DL, L->getChain(), L->getBasePtr(),
18444 L->getPointerInfo(), L->getOriginalAlign(),
18445 L->getMemOperand()->getFlags());
18446return DAG.getStore(Chain,DL, NewL, Store->getBasePtr(),
18447 Store->getPointerInfo(), Store->getOriginalAlign(),
18448 Store->getMemOperand()->getFlags());
18449 }
18450 }
18451
18452// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18453// vfmv.f.s is represented as extract element from 0. Match it late to avoid
18454// any illegal types.
18455if (Val.getOpcode() ==RISCVISD::VMV_X_S ||
18456 (DCI.isAfterLegalizeDAG() &&
18457 Val.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&
18458isNullConstant(Val.getOperand(1)))) {
18459SDValue Src = Val.getOperand(0);
18460MVT VecVT = Src.getSimpleValueType();
18461// VecVT should be scalable and memory VT should match the element type.
18462if (!Store->isIndexed() && VecVT.isScalableVector() &&
18463 MemVT == VecVT.getVectorElementType()) {
18464SDLocDL(N);
18465MVT MaskVT =getMaskTypeFor(VecVT);
18466return DAG.getStoreVP(
18467 Store->getChain(),DL, Src, Store->getBasePtr(), Store->getOffset(),
18468 DAG.getConstant(1,DL, MaskVT),
18469 DAG.getConstant(1,DL, Subtarget.getXLenVT()), MemVT,
18470 Store->getMemOperand(), Store->getAddressingMode(),
18471 Store->isTruncatingStore(),/*IsCompress*/false);
18472 }
18473 }
18474
18475break;
18476 }
18477caseISD::SPLAT_VECTOR: {
18478EVT VT =N->getValueType(0);
18479// Only perform this combine on legal MVT types.
18480if (!isTypeLegal(VT))
18481break;
18482if (auto Gather =matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(),N,
18483 DAG, Subtarget))
18484return Gather;
18485break;
18486 }
18487caseISD::BUILD_VECTOR:
18488if (SDValue V =performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18489return V;
18490break;
18491caseISD::CONCAT_VECTORS:
18492if (SDValue V =performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18493return V;
18494break;
18495caseISD::VECTOR_SHUFFLE:
18496if (SDValue V =performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
18497return V;
18498break;
18499caseISD::INSERT_VECTOR_ELT:
18500if (SDValue V =performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18501return V;
18502break;
18503caseRISCVISD::VFMV_V_F_VL: {
18504constMVT VT =N->getSimpleValueType(0);
18505SDValue Passthru =N->getOperand(0);
18506SDValue Scalar =N->getOperand(1);
18507SDValue VL =N->getOperand(2);
18508
18509// If VL is 1, we can use vfmv.s.f.
18510if (isOneConstant(VL))
18511return DAG.getNode(RISCVISD::VFMV_S_F_VL,DL, VT, Passthru, Scalar, VL);
18512break;
18513 }
18514caseRISCVISD::VMV_V_X_VL: {
18515constMVT VT =N->getSimpleValueType(0);
18516SDValue Passthru =N->getOperand(0);
18517SDValue Scalar =N->getOperand(1);
18518SDValue VL =N->getOperand(2);
18519
18520// Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18521// scalar input.
18522unsigned ScalarSize = Scalar.getValueSizeInBits();
18523unsigned EltWidth = VT.getScalarSizeInBits();
18524if (ScalarSize > EltWidth && Passthru.isUndef())
18525if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18526returnSDValue(N, 0);
18527
18528// If VL is 1 and the scalar value won't benefit from immediate, we can
18529// use vmv.s.x.
18530ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18531if (isOneConstant(VL) &&
18532 (!Const || Const->isZero() ||
18533 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18534return DAG.getNode(RISCVISD::VMV_S_X_VL,DL, VT, Passthru, Scalar, VL);
18535
18536break;
18537 }
18538caseRISCVISD::VFMV_S_F_VL: {
18539SDValue Src =N->getOperand(1);
18540// Try to remove vector->scalar->vector if the scalar->vector is inserting
18541// into an undef vector.
18542// TODO: Could use a vslide or vmv.v.v for non-undef.
18543if (N->getOperand(0).isUndef() &&
18544 Src.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&
18545isNullConstant(Src.getOperand(1)) &&
18546 Src.getOperand(0).getValueType().isScalableVector()) {
18547EVT VT =N->getValueType(0);
18548EVT SrcVT = Src.getOperand(0).getValueType();
18549assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
18550// Widths match, just return the original vector.
18551if (SrcVT == VT)
18552return Src.getOperand(0);
18553// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18554 }
18555 [[fallthrough]];
18556 }
18557caseRISCVISD::VMV_S_X_VL: {
18558constMVT VT =N->getSimpleValueType(0);
18559SDValue Passthru =N->getOperand(0);
18560SDValue Scalar =N->getOperand(1);
18561SDValue VL =N->getOperand(2);
18562
18563if (Scalar.getOpcode() ==RISCVISD::VMV_X_S && Passthru.isUndef() &&
18564 Scalar.getOperand(0).getValueType() ==N->getValueType(0))
18565return Scalar.getOperand(0);
18566
18567// Use M1 or smaller to avoid over constraining register allocation
18568constMVT M1VT =getLMUL1VT(VT);
18569if (M1VT.bitsLT(VT)) {
18570SDValue M1Passthru =
18571 DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, Passthru,
18572 DAG.getVectorIdxConstant(0,DL));
18573SDValue Result =
18574 DAG.getNode(N->getOpcode(),DL, M1VT, M1Passthru, Scalar, VL);
18575 Result = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, Passthru, Result,
18576 DAG.getVectorIdxConstant(0,DL));
18577return Result;
18578 }
18579
18580// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18581// higher would involve overly constraining the register allocator for
18582// no purpose.
18583if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18584 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18585 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18586return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru, Scalar, VL);
18587
18588break;
18589 }
18590caseRISCVISD::VMV_X_S: {
18591SDValue Vec =N->getOperand(0);
18592MVT VecVT =N->getOperand(0).getSimpleValueType();
18593constMVT M1VT =getLMUL1VT(VecVT);
18594if (M1VT.bitsLT(VecVT)) {
18595 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, Vec,
18596 DAG.getVectorIdxConstant(0,DL));
18597return DAG.getNode(RISCVISD::VMV_X_S,DL,N->getSimpleValueType(0), Vec);
18598 }
18599break;
18600 }
18601caseISD::INTRINSIC_VOID:
18602caseISD::INTRINSIC_W_CHAIN:
18603caseISD::INTRINSIC_WO_CHAIN: {
18604unsigned IntOpNo =N->getOpcode() ==ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18605unsigned IntNo =N->getConstantOperandVal(IntOpNo);
18606switch (IntNo) {
18607// By default we do not combine any intrinsic.
18608default:
18609returnSDValue();
18610case Intrinsic::riscv_vcpop:
18611case Intrinsic::riscv_vcpop_mask:
18612case Intrinsic::riscv_vfirst:
18613case Intrinsic::riscv_vfirst_mask: {
18614SDValue VL =N->getOperand(2);
18615if (IntNo == Intrinsic::riscv_vcpop_mask ||
18616 IntNo == Intrinsic::riscv_vfirst_mask)
18617 VL =N->getOperand(3);
18618if (!isNullConstant(VL))
18619returnSDValue();
18620// If VL is 0, vcpop -> li 0, vfirst -> li -1.
18621SDLocDL(N);
18622EVT VT =N->getValueType(0);
18623if (IntNo == Intrinsic::riscv_vfirst ||
18624 IntNo == Intrinsic::riscv_vfirst_mask)
18625return DAG.getAllOnesConstant(DL, VT);
18626return DAG.getConstant(0,DL, VT);
18627 }
18628 }
18629 }
18630case ISD::EXPERIMENTAL_VP_REVERSE:
18631returnperformVP_REVERSECombine(N, DAG, Subtarget);
18632case ISD::VP_STORE:
18633returnperformVP_STORECombine(N, DAG, Subtarget);
18634caseISD::BITCAST: {
18635assert(Subtarget.useRVVForFixedLengthVectors());
18636SDValue N0 =N->getOperand(0);
18637EVT VT =N->getValueType(0);
18638EVT SrcVT = N0.getValueType();
18639if (VT.isRISCVVectorTuple() && N0->getOpcode() ==ISD::SPLAT_VECTOR) {
18640unsigned NF = VT.getRISCVVectorTupleNumFields();
18641unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18642SDValue EltVal = DAG.getConstant(0,DL, Subtarget.getXLenVT());
18643MVT ScalTy =MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18644
18645SDValueSplat = DAG.getNode(ISD::SPLAT_VECTOR,DL, ScalTy, EltVal);
18646
18647SDValue Result = DAG.getUNDEF(VT);
18648for (unsigned i = 0; i < NF; ++i)
18649 Result = DAG.getNode(RISCVISD::TUPLE_INSERT,DL, VT, Result,Splat,
18650 DAG.getVectorIdxConstant(i,DL));
18651return Result;
18652 }
18653// If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18654// type, widen both sides to avoid a trip through memory.
18655if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18656 VT.isScalarInteger()) {
18657unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18658SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18659 Ops[0] = N0;
18660SDLocDL(N);
18661 N0 = DAG.getNode(ISD::CONCAT_VECTORS,DL, MVT::v8i1, Ops);
18662 N0 = DAG.getBitcast(MVT::i8, N0);
18663return DAG.getNode(ISD::TRUNCATE,DL, VT, N0);
18664 }
18665
18666returnSDValue();
18667 }
18668caseISD::CTPOP:
18669if (SDValue V =combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18670return V;
18671break;
18672 }
18673
18674returnSDValue();
18675}
18676
18677boolRISCVTargetLowering::shouldTransformSignedTruncationCheck(
18678EVT XVT,unsigned KeptBits) const{
18679// For vectors, we don't have a preference..
18680if (XVT.isVector())
18681returnfalse;
18682
18683if (XVT != MVT::i32 && XVT != MVT::i64)
18684returnfalse;
18685
18686// We can use sext.w for RV64 or an srai 31 on RV32.
18687if (KeptBits == 32 || KeptBits == 64)
18688returntrue;
18689
18690// With Zbb we can use sext.h/sext.b.
18691return Subtarget.hasStdExtZbb() &&
18692 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18693 KeptBits == 16);
18694}
18695
18696boolRISCVTargetLowering::isDesirableToCommuteWithShift(
18697constSDNode *N,CombineLevel Level) const{
18698assert((N->getOpcode() ==ISD::SHL ||N->getOpcode() ==ISD::SRA ||
18699N->getOpcode() ==ISD::SRL) &&
18700"Expected shift op");
18701
18702// The following folds are only desirable if `(OP _, c1 << c2)` can be
18703// materialised in fewer instructions than `(OP _, c1)`:
18704//
18705// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18706// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18707SDValue N0 =N->getOperand(0);
18708EVT Ty = N0.getValueType();
18709
18710// LD/ST will optimize constant Offset extraction, so when AddNode is used by
18711// LD/ST, it can still complete the folding optimization operation performed
18712// above.
18713auto isUsedByLdSt = [](constSDNode *X,constSDNode *User) {
18714for (SDNode *Use :X->users()) {
18715// This use is the one we're on right now. Skip it
18716if (Use ==User ||Use->getOpcode() ==ISD::SELECT)
18717continue;
18718if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18719returnfalse;
18720 }
18721returntrue;
18722 };
18723
18724if (Ty.isScalarInteger() &&
18725 (N0.getOpcode() ==ISD::ADD || N0.getOpcode() ==ISD::OR)) {
18726if (N0.getOpcode() ==ISD::ADD && !N0->hasOneUse())
18727return isUsedByLdSt(N0.getNode(),N);
18728
18729auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18730auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18731
18732// Bail if we might break a sh{1,2,3}add pattern.
18733if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18734 C2->getZExtValue() <= 3 &&N->hasOneUse() &&
18735N->user_begin()->getOpcode() ==ISD::ADD &&
18736 !isUsedByLdSt(*N->user_begin(),nullptr) &&
18737 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18738returnfalse;
18739
18740if (C1 && C2) {
18741constAPInt &C1Int = C1->getAPIntValue();
18742APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18743
18744// We can materialise `c1 << c2` into an add immediate, so it's "free",
18745// and the combine should happen, to potentially allow further combines
18746// later.
18747if (ShiftedC1Int.getSignificantBits() <= 64 &&
18748isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18749returntrue;
18750
18751// We can materialise `c1` in an add immediate, so it's "free", and the
18752// combine should be prevented.
18753if (C1Int.getSignificantBits() <= 64 &&
18754isLegalAddImmediate(C1Int.getSExtValue()))
18755returnfalse;
18756
18757// Neither constant will fit into an immediate, so find materialisation
18758// costs.
18759int C1Cost =
18760RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18761/*CompressionCost*/true);
18762int ShiftedC1Cost =RISCVMatInt::getIntMatCost(
18763 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18764/*CompressionCost*/true);
18765
18766// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18767// combine should be prevented.
18768if (C1Cost < ShiftedC1Cost)
18769returnfalse;
18770 }
18771 }
18772
18773if (!N0->hasOneUse())
18774returnfalse;
18775
18776if (N0->getOpcode() ==ISD::SIGN_EXTEND &&
18777 N0->getOperand(0)->getOpcode() ==ISD::ADD &&
18778 !N0->getOperand(0)->hasOneUse())
18779return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18780
18781returntrue;
18782}
18783
18784boolRISCVTargetLowering::targetShrinkDemandedConstant(
18785SDValueOp,constAPInt &DemandedBits,constAPInt &DemandedElts,
18786TargetLoweringOpt &TLO) const{
18787// Delay this optimization as late as possible.
18788if (!TLO.LegalOps)
18789returnfalse;
18790
18791EVT VT =Op.getValueType();
18792if (VT.isVector())
18793returnfalse;
18794
18795unsigned Opcode =Op.getOpcode();
18796if (Opcode !=ISD::AND && Opcode !=ISD::OR && Opcode !=ISD::XOR)
18797returnfalse;
18798
18799ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18800if (!C)
18801returnfalse;
18802
18803constAPInt &Mask =C->getAPIntValue();
18804
18805// Clear all non-demanded bits initially.
18806APInt ShrunkMask = Mask &DemandedBits;
18807
18808// Try to make a smaller immediate by setting undemanded bits.
18809
18810APInt ExpandedMask = Mask |~DemandedBits;
18811
18812auto IsLegalMask = [ShrunkMask, ExpandedMask](constAPInt &Mask) ->bool {
18813return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18814 };
18815auto UseMask = [Mask,Op, &TLO](constAPInt &NewMask) ->bool {
18816if (NewMask == Mask)
18817returntrue;
18818SDLocDL(Op);
18819SDValue NewC = TLO.DAG.getConstant(NewMask,DL,Op.getValueType());
18820SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),
18821Op.getOperand(0), NewC);
18822return TLO.CombineTo(Op, NewOp);
18823 };
18824
18825// If the shrunk mask fits in sign extended 12 bits, let the target
18826// independent code apply it.
18827if (ShrunkMask.isSignedIntN(12))
18828returnfalse;
18829
18830// And has a few special cases for zext.
18831if (Opcode ==ISD::AND) {
18832// Preserve (and X, 0xffff), if zext.h exists use zext.h,
18833// otherwise use SLLI + SRLI.
18834APInt NewMask =APInt(Mask.getBitWidth(), 0xffff);
18835if (IsLegalMask(NewMask))
18836return UseMask(NewMask);
18837
18838// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18839if (VT == MVT::i64) {
18840APInt NewMask =APInt(64, 0xffffffff);
18841if (IsLegalMask(NewMask))
18842return UseMask(NewMask);
18843 }
18844 }
18845
18846// For the remaining optimizations, we need to be able to make a negative
18847// number through a combination of mask and undemanded bits.
18848if (!ExpandedMask.isNegative())
18849returnfalse;
18850
18851// What is the fewest number of bits we need to represent the negative number.
18852unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18853
18854// Try to make a 12 bit negative immediate. If that fails try to make a 32
18855// bit negative immediate unless the shrunk immediate already fits in 32 bits.
18856// If we can't create a simm12, we shouldn't change opaque constants.
18857APInt NewMask = ShrunkMask;
18858if (MinSignedBits <= 12)
18859 NewMask.setBitsFrom(11);
18860elseif (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18861 NewMask.setBitsFrom(31);
18862else
18863returnfalse;
18864
18865// Check that our new mask is a subset of the demanded mask.
18866assert(IsLegalMask(NewMask));
18867return UseMask(NewMask);
18868}
18869
18870staticuint64_tcomputeGREVOrGORC(uint64_t x,unsigned ShAmt,bool IsGORC) {
18871staticconstuint64_t GREVMasks[] = {
18872 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18873 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18874
18875for (unsigned Stage = 0; Stage != 6; ++Stage) {
18876unsigned Shift = 1 << Stage;
18877if (ShAmt & Shift) {
18878uint64_t Mask = GREVMasks[Stage];
18879uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18880if (IsGORC)
18881 Res |= x;
18882 x = Res;
18883 }
18884 }
18885
18886return x;
18887}
18888
18889voidRISCVTargetLowering::computeKnownBitsForTargetNode(constSDValueOp,
18890KnownBits &Known,
18891constAPInt &DemandedElts,
18892constSelectionDAG &DAG,
18893unsignedDepth) const{
18894unsignedBitWidth = Known.getBitWidth();
18895unsigned Opc =Op.getOpcode();
18896assert((Opc >=ISD::BUILTIN_OP_END ||
18897 Opc ==ISD::INTRINSIC_WO_CHAIN ||
18898 Opc ==ISD::INTRINSIC_W_CHAIN ||
18899 Opc ==ISD::INTRINSIC_VOID) &&
18900"Should use MaskedValueIsZero if you don't know whether Op"
18901" is a target node!");
18902
18903 Known.resetAll();
18904switch (Opc) {
18905default:break;
18906caseRISCVISD::SELECT_CC: {
18907 Known = DAG.computeKnownBits(Op.getOperand(4),Depth + 1);
18908// If we don't know any bits, early out.
18909if (Known.isUnknown())
18910break;
18911KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3),Depth + 1);
18912
18913// Only known if known in both the LHS and RHS.
18914 Known = Known.intersectWith(Known2);
18915break;
18916 }
18917caseRISCVISD::CZERO_EQZ:
18918caseRISCVISD::CZERO_NEZ:
18919 Known = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);
18920// Result is either all zero or operand 0. We can propagate zeros, but not
18921// ones.
18922 Known.One.clearAllBits();
18923break;
18924caseRISCVISD::REMUW: {
18925KnownBits Known2;
18926 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);
18927 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);
18928// We only care about the lower 32 bits.
18929 Known =KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18930// Restore the original width by sign extending.
18931 Known = Known.sext(BitWidth);
18932break;
18933 }
18934caseRISCVISD::DIVUW: {
18935KnownBits Known2;
18936 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);
18937 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);
18938// We only care about the lower 32 bits.
18939 Known =KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18940// Restore the original width by sign extending.
18941 Known = Known.sext(BitWidth);
18942break;
18943 }
18944caseRISCVISD::SLLW: {
18945KnownBits Known2;
18946 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);
18947 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);
18948 Known =KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18949// Restore the original width by sign extending.
18950 Known = Known.sext(BitWidth);
18951break;
18952 }
18953caseRISCVISD::CTZW: {
18954KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);
18955unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18956unsigned LowBits =llvm::bit_width(PossibleTZ);
18957 Known.Zero.setBitsFrom(LowBits);
18958break;
18959 }
18960caseRISCVISD::CLZW: {
18961KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);
18962unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18963unsigned LowBits =llvm::bit_width(PossibleLZ);
18964 Known.Zero.setBitsFrom(LowBits);
18965break;
18966 }
18967caseRISCVISD::BREV8:
18968caseRISCVISD::ORC_B: {
18969// FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18970// control value of 7 is equivalent to brev8 and orc.b.
18971 Known = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);
18972bool IsGORC =Op.getOpcode() ==RISCVISD::ORC_B;
18973// To compute zeros, we need to invert the value and invert it back after.
18974 Known.Zero =
18975 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18976 Known.One =computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18977break;
18978 }
18979caseRISCVISD::READ_VLENB: {
18980// We can use the minimum and maximum VLEN values to bound VLENB. We
18981// know VLEN must be a power of two.
18982constunsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18983constunsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18984assert(MinVLenB > 0 &&"READ_VLENB without vector extension enabled?");
18985 Known.Zero.setLowBits(Log2_32(MinVLenB));
18986 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18987if (MaxVLenB == MinVLenB)
18988 Known.One.setBit(Log2_32(MinVLenB));
18989break;
18990 }
18991caseRISCVISD::FCLASS: {
18992// fclass will only set one of the low 10 bits.
18993 Known.Zero.setBitsFrom(10);
18994break;
18995 }
18996caseISD::INTRINSIC_W_CHAIN:
18997caseISD::INTRINSIC_WO_CHAIN: {
18998unsigned IntNo =
18999Op.getConstantOperandVal(Opc ==ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
19000switch (IntNo) {
19001default:
19002// We can't do anything for most intrinsics.
19003break;
19004case Intrinsic::riscv_vsetvli:
19005case Intrinsic::riscv_vsetvlimax: {
19006bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
19007unsigned VSEW =Op.getConstantOperandVal(HasAVL + 1);
19008RISCVII::VLMUL VLMUL =
19009static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
19010unsigned SEW =RISCVVType::decodeVSEW(VSEW);
19011auto [LMul, Fractional] =RISCVVType::decodeVLMUL(VLMUL);
19012uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
19013 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
19014
19015// Result of vsetvli must be not larger than AVL.
19016if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
19017 MaxVL = std::min(MaxVL,Op.getConstantOperandVal(1));
19018
19019unsigned KnownZeroFirstBit =Log2_32(MaxVL) + 1;
19020if (BitWidth > KnownZeroFirstBit)
19021 Known.Zero.setBitsFrom(KnownZeroFirstBit);
19022break;
19023 }
19024 }
19025break;
19026 }
19027 }
19028}
19029
19030unsignedRISCVTargetLowering::ComputeNumSignBitsForTargetNode(
19031SDValueOp,constAPInt &DemandedElts,constSelectionDAG &DAG,
19032unsignedDepth) const{
19033switch (Op.getOpcode()) {
19034default:
19035break;
19036caseRISCVISD::SELECT_CC: {
19037unsigned Tmp =
19038 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts,Depth + 1);
19039if (Tmp == 1)return 1;// Early out.
19040unsigned Tmp2 =
19041 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts,Depth + 1);
19042return std::min(Tmp, Tmp2);
19043 }
19044caseRISCVISD::CZERO_EQZ:
19045caseRISCVISD::CZERO_NEZ:
19046// Output is either all zero or operand 0. We can propagate sign bit count
19047// from operand 0.
19048return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts,Depth + 1);
19049caseRISCVISD::ABSW: {
19050// We expand this at isel to negw+max. The result will have 33 sign bits
19051// if the input has at least 33 sign bits.
19052unsigned Tmp =
19053 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts,Depth + 1);
19054if (Tmp < 33)return 1;
19055return 33;
19056 }
19057caseRISCVISD::SLLW:
19058caseRISCVISD::SRAW:
19059caseRISCVISD::SRLW:
19060caseRISCVISD::DIVW:
19061caseRISCVISD::DIVUW:
19062caseRISCVISD::REMUW:
19063caseRISCVISD::ROLW:
19064caseRISCVISD::RORW:
19065caseRISCVISD::FCVT_W_RV64:
19066caseRISCVISD::FCVT_WU_RV64:
19067caseRISCVISD::STRICT_FCVT_W_RV64:
19068caseRISCVISD::STRICT_FCVT_WU_RV64:
19069// TODO: As the result is sign-extended, this is conservatively correct. A
19070// more precise answer could be calculated for SRAW depending on known
19071// bits in the shift amount.
19072return 33;
19073caseRISCVISD::VMV_X_S: {
19074// The number of sign bits of the scalar result is computed by obtaining the
19075// element type of the input vector operand, subtracting its width from the
19076// XLEN, and then adding one (sign bit within the element type). If the
19077// element type is wider than XLen, the least-significant XLEN bits are
19078// taken.
19079unsigned XLen = Subtarget.getXLen();
19080unsigned EltBits =Op.getOperand(0).getScalarValueSizeInBits();
19081if (EltBits <= XLen)
19082return XLen - EltBits + 1;
19083break;
19084 }
19085caseISD::INTRINSIC_W_CHAIN: {
19086unsigned IntNo =Op.getConstantOperandVal(1);
19087switch (IntNo) {
19088default:
19089break;
19090case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19091case Intrinsic::riscv_masked_atomicrmw_add_i64:
19092case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19093case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19094case Intrinsic::riscv_masked_atomicrmw_max_i64:
19095case Intrinsic::riscv_masked_atomicrmw_min_i64:
19096case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19097case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19098case Intrinsic::riscv_masked_cmpxchg_i64:
19099// riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
19100// narrow atomic operation. These are implemented using atomic
19101// operations at the minimum supported atomicrmw/cmpxchg width whose
19102// result is then sign extended to XLEN. With +A, the minimum width is
19103// 32 for both 64 and 32.
19104assert(Subtarget.getXLen() == 64);
19105assert(getMinCmpXchgSizeInBits() == 32);
19106assert(Subtarget.hasStdExtA());
19107return 33;
19108 }
19109break;
19110 }
19111 }
19112
19113return 1;
19114}
19115
19116boolRISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
19117SDValueOp,constAPInt &DemandedElts,constSelectionDAG &DAG,
19118boolPoisonOnly,bool ConsiderFlags,unsignedDepth) const{
19119
19120// TODO: Add more target nodes.
19121switch (Op.getOpcode()) {
19122caseRISCVISD::SELECT_CC:
19123// Integer select_cc cannot create poison.
19124// TODO: What are the FP poison semantics?
19125// TODO: This instruction blocks poison from the unselected operand, can
19126// we do anything with that?
19127return !Op.getValueType().isInteger();
19128 }
19129returnTargetLowering::canCreateUndefOrPoisonForTargetNode(
19130Op, DemandedElts, DAG,PoisonOnly, ConsiderFlags,Depth);
19131}
19132
19133constConstant *
19134RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const{
19135assert(Ld &&"Unexpected null LoadSDNode");
19136if (!ISD::isNormalLoad(Ld))
19137returnnullptr;
19138
19139SDValuePtr = Ld->getBasePtr();
19140
19141// Only constant pools with no offset are supported.
19142auto GetSupportedConstantPool = [](SDValuePtr) ->ConstantPoolSDNode * {
19143auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
19144if (!CNode || CNode->isMachineConstantPoolEntry() ||
19145 CNode->getOffset() != 0)
19146returnnullptr;
19147
19148return CNode;
19149 };
19150
19151// Simple case, LLA.
19152if (Ptr.getOpcode() ==RISCVISD::LLA) {
19153auto *CNode = GetSupportedConstantPool(Ptr);
19154if (!CNode || CNode->getTargetFlags() != 0)
19155returnnullptr;
19156
19157return CNode->getConstVal();
19158 }
19159
19160// Look for a HI and ADD_LO pair.
19161if (Ptr.getOpcode() !=RISCVISD::ADD_LO ||
19162Ptr.getOperand(0).getOpcode() !=RISCVISD::HI)
19163returnnullptr;
19164
19165auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
19166auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
19167
19168if (!CNodeLo || CNodeLo->getTargetFlags() !=RISCVII::MO_LO ||
19169 !CNodeHi || CNodeHi->getTargetFlags() !=RISCVII::MO_HI)
19170returnnullptr;
19171
19172if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19173returnnullptr;
19174
19175return CNodeLo->getConstVal();
19176}
19177
19178staticMachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,
19179MachineBasicBlock *BB) {
19180assert(MI.getOpcode() == RISCV::ReadCounterWide &&"Unexpected instruction");
19181
19182// To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
19183// Should the count have wrapped while it was being read, we need to try
19184// again.
19185// For example:
19186// ```
19187// read:
19188// csrrs x3, counterh # load high word of counter
19189// csrrs x2, counter # load low word of counter
19190// csrrs x4, counterh # load high word of counter
19191// bne x3, x4, read # check if high word reads match, otherwise try again
19192// ```
19193
19194MachineFunction &MF = *BB->getParent();
19195constBasicBlock *LLVMBB = BB->getBasicBlock();
19196MachineFunction::iterator It = ++BB->getIterator();
19197
19198MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19199 MF.insert(It, LoopMBB);
19200
19201MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19202 MF.insert(It, DoneMBB);
19203
19204// Transfer the remainder of BB and its successor edges to DoneMBB.
19205 DoneMBB->splice(DoneMBB->begin(), BB,
19206 std::next(MachineBasicBlock::iterator(MI)), BB->end());
19207 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
19208
19209 BB->addSuccessor(LoopMBB);
19210
19211MachineRegisterInfo &RegInfo = MF.getRegInfo();
19212Register ReadAgainReg =RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19213Register LoReg =MI.getOperand(0).getReg();
19214Register HiReg =MI.getOperand(1).getReg();
19215 int64_t LoCounter =MI.getOperand(2).getImm();
19216 int64_t HiCounter =MI.getOperand(3).getImm();
19217DebugLocDL =MI.getDebugLoc();
19218
19219constTargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
19220BuildMI(LoopMBB,DL,TII->get(RISCV::CSRRS), HiReg)
19221 .addImm(HiCounter)
19222 .addReg(RISCV::X0);
19223BuildMI(LoopMBB,DL,TII->get(RISCV::CSRRS), LoReg)
19224 .addImm(LoCounter)
19225 .addReg(RISCV::X0);
19226BuildMI(LoopMBB,DL,TII->get(RISCV::CSRRS), ReadAgainReg)
19227 .addImm(HiCounter)
19228 .addReg(RISCV::X0);
19229
19230BuildMI(LoopMBB,DL,TII->get(RISCV::BNE))
19231 .addReg(HiReg)
19232 .addReg(ReadAgainReg)
19233 .addMBB(LoopMBB);
19234
19235 LoopMBB->addSuccessor(LoopMBB);
19236 LoopMBB->addSuccessor(DoneMBB);
19237
19238MI.eraseFromParent();
19239
19240return DoneMBB;
19241}
19242
19243staticMachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
19244MachineBasicBlock *BB,
19245constRISCVSubtarget &Subtarget) {
19246assert(MI.getOpcode() == RISCV::SplitF64Pseudo &&"Unexpected instruction");
19247
19248MachineFunction &MF = *BB->getParent();
19249DebugLocDL =MI.getDebugLoc();
19250constTargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
19251constTargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
19252Register LoReg =MI.getOperand(0).getReg();
19253Register HiReg =MI.getOperand(1).getReg();
19254Register SrcReg =MI.getOperand(2).getReg();
19255
19256constTargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19257int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19258
19259TII.storeRegToStackSlot(*BB,MI, SrcReg,MI.getOperand(2).isKill(), FI, SrcRC,
19260 RI,Register());
19261MachinePointerInfo MPI =MachinePointerInfo::getFixedStack(MF, FI);
19262MachineMemOperand *MMOLo =
19263 MF.getMachineMemOperand(MPI,MachineMemOperand::MOLoad, 4,Align(8));
19264MachineMemOperand *MMOHi = MF.getMachineMemOperand(
19265 MPI.getWithOffset(4),MachineMemOperand::MOLoad, 4,Align(8));
19266BuildMI(*BB,MI,DL,TII.get(RISCV::LW), LoReg)
19267 .addFrameIndex(FI)
19268 .addImm(0)
19269 .addMemOperand(MMOLo);
19270BuildMI(*BB,MI,DL,TII.get(RISCV::LW), HiReg)
19271 .addFrameIndex(FI)
19272 .addImm(4)
19273 .addMemOperand(MMOHi);
19274MI.eraseFromParent();// The pseudo instruction is gone now.
19275return BB;
19276}
19277
19278staticMachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
19279MachineBasicBlock *BB,
19280constRISCVSubtarget &Subtarget) {
19281assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19282"Unexpected instruction");
19283
19284MachineFunction &MF = *BB->getParent();
19285DebugLocDL =MI.getDebugLoc();
19286constTargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
19287constTargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
19288Register DstReg =MI.getOperand(0).getReg();
19289Register LoReg =MI.getOperand(1).getReg();
19290Register HiReg =MI.getOperand(2).getReg();
19291
19292constTargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19293int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19294
19295MachinePointerInfo MPI =MachinePointerInfo::getFixedStack(MF, FI);
19296MachineMemOperand *MMOLo =
19297 MF.getMachineMemOperand(MPI,MachineMemOperand::MOStore, 4,Align(8));
19298MachineMemOperand *MMOHi = MF.getMachineMemOperand(
19299 MPI.getWithOffset(4),MachineMemOperand::MOStore, 4,Align(8));
19300BuildMI(*BB,MI,DL,TII.get(RISCV::SW))
19301 .addReg(LoReg,getKillRegState(MI.getOperand(1).isKill()))
19302 .addFrameIndex(FI)
19303 .addImm(0)
19304 .addMemOperand(MMOLo);
19305BuildMI(*BB,MI,DL,TII.get(RISCV::SW))
19306 .addReg(HiReg,getKillRegState(MI.getOperand(2).isKill()))
19307 .addFrameIndex(FI)
19308 .addImm(4)
19309 .addMemOperand(MMOHi);
19310TII.loadRegFromStackSlot(*BB,MI, DstReg, FI, DstRC, RI,Register());
19311MI.eraseFromParent();// The pseudo instruction is gone now.
19312return BB;
19313}
19314
19315staticboolisSelectPseudo(MachineInstr &MI) {
19316switch (MI.getOpcode()) {
19317default:
19318returnfalse;
19319case RISCV::Select_GPR_Using_CC_GPR:
19320case RISCV::Select_GPR_Using_CC_Imm:
19321case RISCV::Select_FPR16_Using_CC_GPR:
19322case RISCV::Select_FPR16INX_Using_CC_GPR:
19323case RISCV::Select_FPR32_Using_CC_GPR:
19324case RISCV::Select_FPR32INX_Using_CC_GPR:
19325case RISCV::Select_FPR64_Using_CC_GPR:
19326case RISCV::Select_FPR64INX_Using_CC_GPR:
19327case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19328returntrue;
19329 }
19330}
19331
19332staticMachineBasicBlock *emitQuietFCMP(MachineInstr &MI,MachineBasicBlock *BB,
19333unsigned RelOpcode,unsigned EqOpcode,
19334constRISCVSubtarget &Subtarget) {
19335DebugLocDL =MI.getDebugLoc();
19336Register DstReg =MI.getOperand(0).getReg();
19337Register Src1Reg =MI.getOperand(1).getReg();
19338Register Src2Reg =MI.getOperand(2).getReg();
19339MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
19340Register SavedFFlags =MRI.createVirtualRegister(&RISCV::GPRRegClass);
19341constTargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
19342
19343// Save the current FFLAGS.
19344BuildMI(*BB,MI,DL,TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19345
19346auto MIB =BuildMI(*BB,MI,DL,TII.get(RelOpcode), DstReg)
19347 .addReg(Src1Reg)
19348 .addReg(Src2Reg);
19349if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19350 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19351
19352// Restore the FFLAGS.
19353BuildMI(*BB,MI,DL,TII.get(RISCV::WriteFFLAGS))
19354 .addReg(SavedFFlags,RegState::Kill);
19355
19356// Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19357auto MIB2 =BuildMI(*BB,MI,DL,TII.get(EqOpcode), RISCV::X0)
19358 .addReg(Src1Reg,getKillRegState(MI.getOperand(1).isKill()))
19359 .addReg(Src2Reg,getKillRegState(MI.getOperand(2).isKill()));
19360if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19361 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
19362
19363// Erase the pseudoinstruction.
19364MI.eraseFromParent();
19365return BB;
19366}
19367
19368staticMachineBasicBlock *
19369EmitLoweredCascadedSelect(MachineInstr &First,MachineInstr &Second,
19370MachineBasicBlock *ThisMBB,
19371constRISCVSubtarget &Subtarget) {
19372// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19373// Without this, custom-inserter would have generated:
19374//
19375// A
19376// | \
19377 // | B
19378// | /
19379// C
19380// | \
19381 // | D
19382// | /
19383// E
19384//
19385// A: X = ...; Y = ...
19386// B: empty
19387// C: Z = PHI [X, A], [Y, B]
19388// D: empty
19389// E: PHI [X, C], [Z, D]
19390//
19391// If we lower both Select_FPRX_ in a single step, we can instead generate:
19392//
19393// A
19394// | \
19395 // | C
19396// | /|
19397// |/ |
19398// | |
19399// | D
19400// | /
19401// E
19402//
19403// A: X = ...; Y = ...
19404// D: empty
19405// E: PHI [X, A], [X, C], [Y, D]
19406
19407constRISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19408constDebugLoc &DL =First.getDebugLoc();
19409constBasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19410MachineFunction *F = ThisMBB->getParent();
19411MachineBasicBlock *FirstMBB =F->CreateMachineBasicBlock(LLVM_BB);
19412MachineBasicBlock *SecondMBB =F->CreateMachineBasicBlock(LLVM_BB);
19413MachineBasicBlock *SinkMBB =F->CreateMachineBasicBlock(LLVM_BB);
19414MachineFunction::iterator It = ++ThisMBB->getIterator();
19415F->insert(It, FirstMBB);
19416F->insert(It, SecondMBB);
19417F->insert(It, SinkMBB);
19418
19419// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19420 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19421 std::next(MachineBasicBlock::iterator(First)),
19422 ThisMBB->end());
19423 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19424
19425// Fallthrough block for ThisMBB.
19426 ThisMBB->addSuccessor(FirstMBB);
19427// Fallthrough block for FirstMBB.
19428 FirstMBB->addSuccessor(SecondMBB);
19429 ThisMBB->addSuccessor(SinkMBB);
19430 FirstMBB->addSuccessor(SinkMBB);
19431// This is fallthrough.
19432 SecondMBB->addSuccessor(SinkMBB);
19433
19434auto FirstCC =static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19435Register FLHS =First.getOperand(1).getReg();
19436Register FRHS =First.getOperand(2).getReg();
19437// Insert appropriate branch.
19438BuildMI(FirstMBB,DL,TII.getBrCond(FirstCC))
19439 .addReg(FLHS)
19440 .addReg(FRHS)
19441 .addMBB(SinkMBB);
19442
19443Register SLHS = Second.getOperand(1).getReg();
19444Register SRHS = Second.getOperand(2).getReg();
19445Register Op1Reg4 =First.getOperand(4).getReg();
19446Register Op1Reg5 =First.getOperand(5).getReg();
19447
19448auto SecondCC =static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19449// Insert appropriate branch.
19450BuildMI(ThisMBB,DL,TII.getBrCond(SecondCC))
19451 .addReg(SLHS)
19452 .addReg(SRHS)
19453 .addMBB(SinkMBB);
19454
19455Register DestReg = Second.getOperand(0).getReg();
19456Register Op2Reg4 = Second.getOperand(4).getReg();
19457BuildMI(*SinkMBB, SinkMBB->begin(),DL,TII.get(RISCV::PHI), DestReg)
19458 .addReg(Op2Reg4)
19459 .addMBB(ThisMBB)
19460 .addReg(Op1Reg4)
19461 .addMBB(FirstMBB)
19462 .addReg(Op1Reg5)
19463 .addMBB(SecondMBB);
19464
19465// Now remove the Select_FPRX_s.
19466First.eraseFromParent();
19467 Second.eraseFromParent();
19468return SinkMBB;
19469}
19470
19471staticMachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
19472MachineBasicBlock *BB,
19473constRISCVSubtarget &Subtarget) {
19474// To "insert" Select_* instructions, we actually have to insert the triangle
19475// control-flow pattern. The incoming instructions know the destination vreg
19476// to set, the condition code register to branch on, the true/false values to
19477// select between, and the condcode to use to select the appropriate branch.
19478//
19479// We produce the following control flow:
19480// HeadMBB
19481// | \
19482 // | IfFalseMBB
19483// | /
19484// TailMBB
19485//
19486// When we find a sequence of selects we attempt to optimize their emission
19487// by sharing the control flow. Currently we only handle cases where we have
19488// multiple selects with the exact same condition (same LHS, RHS and CC).
19489// The selects may be interleaved with other instructions if the other
19490// instructions meet some requirements we deem safe:
19491// - They are not pseudo instructions.
19492// - They are debug instructions. Otherwise,
19493// - They do not have side-effects, do not access memory and their inputs do
19494// not depend on the results of the select pseudo-instructions.
19495// The TrueV/FalseV operands of the selects cannot depend on the result of
19496// previous selects in the sequence.
19497// These conditions could be further relaxed. See the X86 target for a
19498// related approach and more information.
19499//
19500// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19501// is checked here and handled by a separate function -
19502// EmitLoweredCascadedSelect.
19503
19504auto Next =next_nodbg(MI.getIterator(), BB->instr_end());
19505if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19506MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19507 Next != BB->end() && Next->getOpcode() ==MI.getOpcode() &&
19508 Next->getOperand(5).getReg() ==MI.getOperand(0).getReg() &&
19509 Next->getOperand(5).isKill())
19510returnEmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19511
19512RegisterLHS =MI.getOperand(1).getReg();
19513RegisterRHS;
19514if (MI.getOperand(2).isReg())
19515RHS =MI.getOperand(2).getReg();
19516autoCC =static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19517
19518SmallVector<MachineInstr *, 4> SelectDebugValues;
19519SmallSet<Register, 4> SelectDests;
19520 SelectDests.insert(MI.getOperand(0).getReg());
19521
19522MachineInstr *LastSelectPseudo = &MI;
19523for (auto E = BB->end(), SequenceMBBI =MachineBasicBlock::iterator(MI);
19524 SequenceMBBI != E; ++SequenceMBBI) {
19525if (SequenceMBBI->isDebugInstr())
19526continue;
19527if (isSelectPseudo(*SequenceMBBI)) {
19528if (SequenceMBBI->getOperand(1).getReg() !=LHS ||
19529 !SequenceMBBI->getOperand(2).isReg() ||
19530 SequenceMBBI->getOperand(2).getReg() !=RHS ||
19531 SequenceMBBI->getOperand(3).getImm() !=CC ||
19532 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19533 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19534break;
19535 LastSelectPseudo = &*SequenceMBBI;
19536 SequenceMBBI->collectDebugValues(SelectDebugValues);
19537 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19538continue;
19539 }
19540if (SequenceMBBI->hasUnmodeledSideEffects() ||
19541 SequenceMBBI->mayLoadOrStore() ||
19542 SequenceMBBI->usesCustomInsertionHook())
19543break;
19544if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19545 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19546 }))
19547break;
19548 }
19549
19550constRISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19551constBasicBlock *LLVM_BB = BB->getBasicBlock();
19552DebugLocDL =MI.getDebugLoc();
19553MachineFunction::iteratorI = ++BB->getIterator();
19554
19555MachineBasicBlock *HeadMBB = BB;
19556MachineFunction *F = BB->getParent();
19557MachineBasicBlock *TailMBB =F->CreateMachineBasicBlock(LLVM_BB);
19558MachineBasicBlock *IfFalseMBB =F->CreateMachineBasicBlock(LLVM_BB);
19559
19560F->insert(I, IfFalseMBB);
19561F->insert(I, TailMBB);
19562
19563// Set the call frame size on entry to the new basic blocks.
19564unsigned CallFrameSize =TII.getCallFrameSizeAt(*LastSelectPseudo);
19565 IfFalseMBB->setCallFrameSize(CallFrameSize);
19566 TailMBB->setCallFrameSize(CallFrameSize);
19567
19568// Transfer debug instructions associated with the selects to TailMBB.
19569for (MachineInstr *DebugInstr : SelectDebugValues) {
19570 TailMBB->push_back(DebugInstr->removeFromParent());
19571 }
19572
19573// Move all instructions after the sequence to TailMBB.
19574 TailMBB->splice(TailMBB->end(), HeadMBB,
19575 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19576// Update machine-CFG edges by transferring all successors of the current
19577// block to the new block which will contain the Phi nodes for the selects.
19578 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19579// Set the successors for HeadMBB.
19580 HeadMBB->addSuccessor(IfFalseMBB);
19581 HeadMBB->addSuccessor(TailMBB);
19582
19583// Insert appropriate branch.
19584if (MI.getOperand(2).isImm())
19585BuildMI(HeadMBB,DL,TII.getBrCond(CC,MI.getOperand(2).isImm()))
19586 .addReg(LHS)
19587 .addImm(MI.getOperand(2).getImm())
19588 .addMBB(TailMBB);
19589else
19590BuildMI(HeadMBB,DL,TII.getBrCond(CC))
19591 .addReg(LHS)
19592 .addReg(RHS)
19593 .addMBB(TailMBB);
19594
19595// IfFalseMBB just falls through to TailMBB.
19596 IfFalseMBB->addSuccessor(TailMBB);
19597
19598// Create PHIs for all of the select pseudo-instructions.
19599auto SelectMBBI =MI.getIterator();
19600auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19601autoInsertionPoint = TailMBB->begin();
19602while (SelectMBBI != SelectEnd) {
19603auto Next = std::next(SelectMBBI);
19604if (isSelectPseudo(*SelectMBBI)) {
19605// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19606BuildMI(*TailMBB,InsertionPoint, SelectMBBI->getDebugLoc(),
19607TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19608 .addReg(SelectMBBI->getOperand(4).getReg())
19609 .addMBB(HeadMBB)
19610 .addReg(SelectMBBI->getOperand(5).getReg())
19611 .addMBB(IfFalseMBB);
19612 SelectMBBI->eraseFromParent();
19613 }
19614 SelectMBBI = Next;
19615 }
19616
19617F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19618return TailMBB;
19619}
19620
19621// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19622staticconstRISCV::RISCVMaskedPseudoInfo *
19623lookupMaskedIntrinsic(uint16_t MCOpcode,RISCVII::VLMUL LMul,unsigned SEW) {
19624constRISCVVInversePseudosTable::PseudoInfo *Inverse =
19625 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19626assert(Inverse &&"Unexpected LMUL and SEW pair for instruction");
19627constRISCV::RISCVMaskedPseudoInfo *Masked =
19628 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19629assert(Masked &&"Could not find masked instruction for LMUL and SEW pair");
19630returnMasked;
19631}
19632
19633staticMachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
19634MachineBasicBlock *BB,
19635unsigned CVTXOpc) {
19636DebugLocDL =MI.getDebugLoc();
19637
19638constTargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
19639
19640MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
19641Register SavedFFLAGS =MRI.createVirtualRegister(&RISCV::GPRRegClass);
19642
19643// Save the old value of FFLAGS.
19644BuildMI(*BB,MI,DL,TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19645
19646assert(MI.getNumOperands() == 7);
19647
19648// Emit a VFCVT_X_F
19649constTargetRegisterInfo *TRI =
19650 BB->getParent()->getSubtarget().getRegisterInfo();
19651constTargetRegisterClass *RC =MI.getRegClassConstraint(0, &TII,TRI);
19652Register Tmp =MRI.createVirtualRegister(RC);
19653BuildMI(*BB,MI,DL,TII.get(CVTXOpc), Tmp)
19654 .add(MI.getOperand(1))
19655 .add(MI.getOperand(2))
19656 .add(MI.getOperand(3))
19657 .add(MachineOperand::CreateImm(7))// frm = DYN
19658 .add(MI.getOperand(4))
19659 .add(MI.getOperand(5))
19660 .add(MI.getOperand(6))
19661 .add(MachineOperand::CreateReg(RISCV::FRM,
19662/*IsDef*/false,
19663/*IsImp*/true));
19664
19665// Emit a VFCVT_F_X
19666RISCVII::VLMUL LMul =RISCVII::getLMul(MI.getDesc().TSFlags);
19667unsigned Log2SEW =MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19668// There is no E8 variant for VFCVT_F_X.
19669assert(Log2SEW >= 4);
19670unsigned CVTFOpc =
19671lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19672 ->MaskedPseudo;
19673
19674BuildMI(*BB,MI,DL,TII.get(CVTFOpc))
19675 .add(MI.getOperand(0))
19676 .add(MI.getOperand(1))
19677 .addReg(Tmp)
19678 .add(MI.getOperand(3))
19679 .add(MachineOperand::CreateImm(7))// frm = DYN
19680 .add(MI.getOperand(4))
19681 .add(MI.getOperand(5))
19682 .add(MI.getOperand(6))
19683 .add(MachineOperand::CreateReg(RISCV::FRM,
19684/*IsDef*/false,
19685/*IsImp*/true));
19686
19687// Restore FFLAGS.
19688BuildMI(*BB,MI,DL,TII.get(RISCV::WriteFFLAGS))
19689 .addReg(SavedFFLAGS,RegState::Kill);
19690
19691// Erase the pseudoinstruction.
19692MI.eraseFromParent();
19693return BB;
19694}
19695
19696staticMachineBasicBlock *emitFROUND(MachineInstr &MI,MachineBasicBlock *MBB,
19697constRISCVSubtarget &Subtarget) {
19698unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19699constTargetRegisterClass *RC;
19700switch (MI.getOpcode()) {
19701default:
19702llvm_unreachable("Unexpected opcode");
19703case RISCV::PseudoFROUND_H:
19704 CmpOpc = RISCV::FLT_H;
19705 F2IOpc = RISCV::FCVT_W_H;
19706 I2FOpc = RISCV::FCVT_H_W;
19707 FSGNJOpc = RISCV::FSGNJ_H;
19708 FSGNJXOpc = RISCV::FSGNJX_H;
19709 RC = &RISCV::FPR16RegClass;
19710break;
19711case RISCV::PseudoFROUND_H_INX:
19712 CmpOpc = RISCV::FLT_H_INX;
19713 F2IOpc = RISCV::FCVT_W_H_INX;
19714 I2FOpc = RISCV::FCVT_H_W_INX;
19715 FSGNJOpc = RISCV::FSGNJ_H_INX;
19716 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19717 RC = &RISCV::GPRF16RegClass;
19718break;
19719case RISCV::PseudoFROUND_S:
19720 CmpOpc = RISCV::FLT_S;
19721 F2IOpc = RISCV::FCVT_W_S;
19722 I2FOpc = RISCV::FCVT_S_W;
19723 FSGNJOpc = RISCV::FSGNJ_S;
19724 FSGNJXOpc = RISCV::FSGNJX_S;
19725 RC = &RISCV::FPR32RegClass;
19726break;
19727case RISCV::PseudoFROUND_S_INX:
19728 CmpOpc = RISCV::FLT_S_INX;
19729 F2IOpc = RISCV::FCVT_W_S_INX;
19730 I2FOpc = RISCV::FCVT_S_W_INX;
19731 FSGNJOpc = RISCV::FSGNJ_S_INX;
19732 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19733 RC = &RISCV::GPRF32RegClass;
19734break;
19735case RISCV::PseudoFROUND_D:
19736assert(Subtarget.is64Bit() &&"Expected 64-bit GPR.");
19737 CmpOpc = RISCV::FLT_D;
19738 F2IOpc = RISCV::FCVT_L_D;
19739 I2FOpc = RISCV::FCVT_D_L;
19740 FSGNJOpc = RISCV::FSGNJ_D;
19741 FSGNJXOpc = RISCV::FSGNJX_D;
19742 RC = &RISCV::FPR64RegClass;
19743break;
19744case RISCV::PseudoFROUND_D_INX:
19745assert(Subtarget.is64Bit() &&"Expected 64-bit GPR.");
19746 CmpOpc = RISCV::FLT_D_INX;
19747 F2IOpc = RISCV::FCVT_L_D_INX;
19748 I2FOpc = RISCV::FCVT_D_L_INX;
19749 FSGNJOpc = RISCV::FSGNJ_D_INX;
19750 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19751 RC = &RISCV::GPRRegClass;
19752break;
19753 }
19754
19755constBasicBlock *BB =MBB->getBasicBlock();
19756DebugLocDL =MI.getDebugLoc();
19757MachineFunction::iteratorI = ++MBB->getIterator();
19758
19759MachineFunction *F =MBB->getParent();
19760MachineBasicBlock *CvtMBB =F->CreateMachineBasicBlock(BB);
19761MachineBasicBlock *DoneMBB =F->CreateMachineBasicBlock(BB);
19762
19763F->insert(I, CvtMBB);
19764F->insert(I, DoneMBB);
19765// Move all instructions after the sequence to DoneMBB.
19766 DoneMBB->splice(DoneMBB->end(),MBB,MachineBasicBlock::iterator(MI),
19767MBB->end());
19768// Update machine-CFG edges by transferring all successors of the current
19769// block to the new block which will contain the Phi nodes for the selects.
19770 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
19771// Set the successors for MBB.
19772MBB->addSuccessor(CvtMBB);
19773MBB->addSuccessor(DoneMBB);
19774
19775Register DstReg =MI.getOperand(0).getReg();
19776Register SrcReg =MI.getOperand(1).getReg();
19777Register MaxReg =MI.getOperand(2).getReg();
19778 int64_t FRM =MI.getOperand(3).getImm();
19779
19780constRISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19781MachineRegisterInfo &MRI =MBB->getParent()->getRegInfo();
19782
19783Register FabsReg =MRI.createVirtualRegister(RC);
19784BuildMI(MBB,DL,TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19785
19786// Compare the FP value to the max value.
19787Register CmpReg =MRI.createVirtualRegister(&RISCV::GPRRegClass);
19788auto MIB =
19789BuildMI(MBB,DL,TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19790if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19791 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19792
19793// Insert branch.
19794BuildMI(MBB,DL,TII.get(RISCV::BEQ))
19795 .addReg(CmpReg)
19796 .addReg(RISCV::X0)
19797 .addMBB(DoneMBB);
19798
19799 CvtMBB->addSuccessor(DoneMBB);
19800
19801// Convert to integer.
19802Register F2IReg =MRI.createVirtualRegister(&RISCV::GPRRegClass);
19803 MIB =BuildMI(CvtMBB,DL,TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19804if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19805 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19806
19807// Convert back to FP.
19808Register I2FReg =MRI.createVirtualRegister(RC);
19809 MIB =BuildMI(CvtMBB,DL,TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19810if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19811 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19812
19813// Restore the sign bit.
19814Register CvtReg =MRI.createVirtualRegister(RC);
19815BuildMI(CvtMBB,DL,TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19816
19817// Merge the results.
19818BuildMI(*DoneMBB, DoneMBB->begin(),DL,TII.get(RISCV::PHI), DstReg)
19819 .addReg(SrcReg)
19820 .addMBB(MBB)
19821 .addReg(CvtReg)
19822 .addMBB(CvtMBB);
19823
19824MI.eraseFromParent();
19825return DoneMBB;
19826}
19827
19828MachineBasicBlock *
19829RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
19830MachineBasicBlock *BB) const{
19831switch (MI.getOpcode()) {
19832default:
19833llvm_unreachable("Unexpected instr type to insert");
19834case RISCV::ReadCounterWide:
19835assert(!Subtarget.is64Bit() &&
19836"ReadCounterWide is only to be used on riscv32");
19837returnemitReadCounterWidePseudo(MI, BB);
19838case RISCV::Select_GPR_Using_CC_GPR:
19839case RISCV::Select_GPR_Using_CC_Imm:
19840case RISCV::Select_FPR16_Using_CC_GPR:
19841case RISCV::Select_FPR16INX_Using_CC_GPR:
19842case RISCV::Select_FPR32_Using_CC_GPR:
19843case RISCV::Select_FPR32INX_Using_CC_GPR:
19844case RISCV::Select_FPR64_Using_CC_GPR:
19845case RISCV::Select_FPR64INX_Using_CC_GPR:
19846case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19847returnemitSelectPseudo(MI, BB, Subtarget);
19848case RISCV::BuildPairF64Pseudo:
19849returnemitBuildPairF64Pseudo(MI, BB, Subtarget);
19850case RISCV::SplitF64Pseudo:
19851returnemitSplitF64Pseudo(MI, BB, Subtarget);
19852case RISCV::PseudoQuietFLE_H:
19853returnemitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19854case RISCV::PseudoQuietFLE_H_INX:
19855returnemitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19856case RISCV::PseudoQuietFLT_H:
19857returnemitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19858case RISCV::PseudoQuietFLT_H_INX:
19859returnemitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19860case RISCV::PseudoQuietFLE_S:
19861returnemitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19862case RISCV::PseudoQuietFLE_S_INX:
19863returnemitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19864case RISCV::PseudoQuietFLT_S:
19865returnemitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19866case RISCV::PseudoQuietFLT_S_INX:
19867returnemitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19868case RISCV::PseudoQuietFLE_D:
19869returnemitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19870case RISCV::PseudoQuietFLE_D_INX:
19871returnemitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19872case RISCV::PseudoQuietFLE_D_IN32X:
19873returnemitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19874 Subtarget);
19875case RISCV::PseudoQuietFLT_D:
19876returnemitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19877case RISCV::PseudoQuietFLT_D_INX:
19878returnemitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19879case RISCV::PseudoQuietFLT_D_IN32X:
19880returnemitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19881 Subtarget);
19882
19883case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19884returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19885case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19886returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19887case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19888returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19889case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19890returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19891case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19892returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19893case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19894returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19895case RISCV::PseudoFROUND_H:
19896case RISCV::PseudoFROUND_H_INX:
19897case RISCV::PseudoFROUND_S:
19898case RISCV::PseudoFROUND_S_INX:
19899case RISCV::PseudoFROUND_D:
19900case RISCV::PseudoFROUND_D_INX:
19901case RISCV::PseudoFROUND_D_IN32X:
19902returnemitFROUND(MI, BB, Subtarget);
19903case RISCV::PROBED_STACKALLOC_DYN:
19904returnemitDynamicProbedAlloc(MI, BB);
19905case TargetOpcode::STATEPOINT:
19906// STATEPOINT is a pseudo instruction which has no implicit defs/uses
19907// while jal call instruction (where statepoint will be lowered at the end)
19908// has implicit def. This def is early-clobber as it will be set at
19909// the moment of the call and earlier than any use is read.
19910// Add this implicit dead def here as a workaround.
19911MI.addOperand(*MI.getMF(),
19912MachineOperand::CreateReg(
19913 RISCV::X1,/*isDef*/true,
19914/*isImp*/true,/*isKill*/false,/*isDead*/true,
19915/*isUndef*/false,/*isEarlyClobber*/true));
19916 [[fallthrough]];
19917case TargetOpcode::STACKMAP:
19918case TargetOpcode::PATCHPOINT:
19919if (!Subtarget.is64Bit())
19920report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19921"supported on 64-bit targets");
19922returnemitPatchPoint(MI, BB);
19923 }
19924}
19925
19926voidRISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
19927SDNode *Node) const{
19928// Add FRM dependency to any instructions with dynamic rounding mode.
19929intIdx =RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19930if (Idx < 0) {
19931// Vector pseudos have FRM index indicated by TSFlags.
19932Idx =RISCVII::getFRMOpNum(MI.getDesc());
19933if (Idx < 0)
19934return;
19935 }
19936if (MI.getOperand(Idx).getImm() !=RISCVFPRndMode::DYN)
19937return;
19938// If the instruction already reads FRM, don't add another read.
19939if (MI.readsRegister(RISCV::FRM,/*TRI=*/nullptr))
19940return;
19941MI.addOperand(
19942MachineOperand::CreateReg(RISCV::FRM,/*isDef*/false,/*isImp*/true));
19943}
19944
19945void RISCVTargetLowering::analyzeInputArgs(
19946MachineFunction &MF,CCState &CCInfo,
19947constSmallVectorImpl<ISD::InputArg> &Ins,bool IsRet,
19948RISCVCCAssignFn Fn) const{
19949unsigned NumArgs = Ins.size();
19950FunctionType *FType = MF.getFunction().getFunctionType();
19951
19952for (unsigned i = 0; i != NumArgs; ++i) {
19953MVT ArgVT = Ins[i].VT;
19954ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19955
19956Type *ArgTy =nullptr;
19957if (IsRet)
19958 ArgTy = FType->getReturnType();
19959elseif (Ins[i].isOrigArg())
19960 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19961
19962if (Fn(i, ArgVT, ArgVT,CCValAssign::Full, ArgFlags, CCInfo,
19963/*IsFixed=*/true, IsRet, ArgTy)) {
19964LLVM_DEBUG(dbgs() <<"InputArg #" << i <<" has unhandled type "
19965 << ArgVT <<'\n');
19966llvm_unreachable(nullptr);
19967 }
19968 }
19969}
19970
19971void RISCVTargetLowering::analyzeOutputArgs(
19972MachineFunction &MF,CCState &CCInfo,
19973constSmallVectorImpl<ISD::OutputArg> &Outs,bool IsRet,
19974 CallLoweringInfo *CLI,RISCVCCAssignFn Fn) const{
19975unsigned NumArgs = Outs.size();
19976
19977for (unsigned i = 0; i != NumArgs; i++) {
19978MVT ArgVT = Outs[i].VT;
19979ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19980Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty :nullptr;
19981
19982if (Fn(i, ArgVT, ArgVT,CCValAssign::Full, ArgFlags, CCInfo,
19983 Outs[i].IsFixed, IsRet, OrigTy)) {
19984LLVM_DEBUG(dbgs() <<"OutputArg #" << i <<" has unhandled type "
19985 << ArgVT <<"\n");
19986llvm_unreachable(nullptr);
19987 }
19988 }
19989}
19990
19991// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19992// values.
19993staticSDValueconvertLocVTToValVT(SelectionDAG &DAG,SDValue Val,
19994constCCValAssign &VA,constSDLoc &DL,
19995constRISCVSubtarget &Subtarget) {
19996if (VA.needsCustom()) {
19997if (VA.getLocVT().isInteger() &&
19998 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19999return DAG.getNode(RISCVISD::FMV_H_X,DL, VA.getValVT(), Val);
20000if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
20001return DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32, Val);
20002if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
20003returnconvertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
20004llvm_unreachable("Unexpected Custom handling.");
20005 }
20006
20007switch (VA.getLocInfo()) {
20008default:
20009llvm_unreachable("Unexpected CCValAssign::LocInfo");
20010caseCCValAssign::Full:
20011break;
20012caseCCValAssign::BCvt:
20013 Val = DAG.getNode(ISD::BITCAST,DL, VA.getValVT(), Val);
20014break;
20015 }
20016return Val;
20017}
20018
20019// The caller is responsible for loading the full value if the argument is
20020// passed with CCValAssign::Indirect.
20021staticSDValueunpackFromRegLoc(SelectionDAG &DAG,SDValue Chain,
20022constCCValAssign &VA,constSDLoc &DL,
20023constISD::InputArg &In,
20024constRISCVTargetLowering &TLI) {
20025MachineFunction &MF = DAG.getMachineFunction();
20026MachineRegisterInfo &RegInfo = MF.getRegInfo();
20027EVT LocVT = VA.getLocVT();
20028SDValue Val;
20029constTargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
20030Register VReg =RegInfo.createVirtualRegister(RC);
20031RegInfo.addLiveIn(VA.getLocReg(), VReg);
20032 Val = DAG.getCopyFromReg(Chain,DL, VReg, LocVT);
20033
20034// If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
20035if (In.isOrigArg()) {
20036Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
20037if (OrigArg->getType()->isIntegerTy()) {
20038unsignedBitWidth = OrigArg->getType()->getIntegerBitWidth();
20039// An input zero extended from i31 can also be considered sign extended.
20040if ((BitWidth <= 32 && In.Flags.isSExt()) ||
20041 (BitWidth < 32 && In.Flags.isZExt())) {
20042RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
20043 RVFI->addSExt32Register(VReg);
20044 }
20045 }
20046 }
20047
20048if (VA.getLocInfo() ==CCValAssign::Indirect)
20049return Val;
20050
20051returnconvertLocVTToValVT(DAG, Val, VA,DL, TLI.getSubtarget());
20052}
20053
20054staticSDValueconvertValVTToLocVT(SelectionDAG &DAG,SDValue Val,
20055constCCValAssign &VA,constSDLoc &DL,
20056constRISCVSubtarget &Subtarget) {
20057EVT LocVT = VA.getLocVT();
20058
20059if (VA.needsCustom()) {
20060if (LocVT.isInteger() &&
20061 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
20062return DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, LocVT, Val);
20063if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
20064return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Val);
20065if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
20066returnconvertToScalableVector(LocVT, Val, DAG, Subtarget);
20067llvm_unreachable("Unexpected Custom handling.");
20068 }
20069
20070switch (VA.getLocInfo()) {
20071default:
20072llvm_unreachable("Unexpected CCValAssign::LocInfo");
20073caseCCValAssign::Full:
20074break;
20075caseCCValAssign::BCvt:
20076 Val = DAG.getNode(ISD::BITCAST,DL, LocVT, Val);
20077break;
20078 }
20079return Val;
20080}
20081
20082// The caller is responsible for loading the full value if the argument is
20083// passed with CCValAssign::Indirect.
20084staticSDValueunpackFromMemLoc(SelectionDAG &DAG,SDValue Chain,
20085constCCValAssign &VA,constSDLoc &DL) {
20086MachineFunction &MF = DAG.getMachineFunction();
20087MachineFrameInfo &MFI = MF.getFrameInfo();
20088EVT LocVT = VA.getLocVT();
20089EVT ValVT = VA.getValVT();
20090EVT PtrVT =MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
20091if (VA.getLocInfo() ==CCValAssign::Indirect) {
20092// When the value is a scalable vector, we save the pointer which points to
20093// the scalable vector value in the stack. The ValVT will be the pointer
20094// type, instead of the scalable vector type.
20095 ValVT = LocVT;
20096 }
20097int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
20098/*IsImmutable=*/true);
20099SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20100SDValue Val;
20101
20102ISD::LoadExtType ExtType =ISD::NON_EXTLOAD;
20103switch (VA.getLocInfo()) {
20104default:
20105llvm_unreachable("Unexpected CCValAssign::LocInfo");
20106caseCCValAssign::Full:
20107caseCCValAssign::Indirect:
20108caseCCValAssign::BCvt:
20109break;
20110 }
20111 Val = DAG.getExtLoad(
20112 ExtType,DL, LocVT, Chain, FIN,
20113MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
20114return Val;
20115}
20116
20117staticSDValueunpackF64OnRV32DSoftABI(SelectionDAG &DAG,SDValue Chain,
20118constCCValAssign &VA,
20119constCCValAssign &HiVA,
20120constSDLoc &DL) {
20121assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
20122"Unexpected VA");
20123MachineFunction &MF = DAG.getMachineFunction();
20124MachineFrameInfo &MFI = MF.getFrameInfo();
20125MachineRegisterInfo &RegInfo = MF.getRegInfo();
20126
20127assert(VA.isRegLoc() &&"Expected register VA assignment");
20128
20129Register LoVReg =RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20130RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
20131SDValueLo = DAG.getCopyFromReg(Chain,DL, LoVReg, MVT::i32);
20132SDValueHi;
20133if (HiVA.isMemLoc()) {
20134// Second half of f64 is passed on the stack.
20135int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
20136/*IsImmutable=*/true);
20137SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
20138Hi = DAG.getLoad(MVT::i32,DL, Chain, FIN,
20139MachinePointerInfo::getFixedStack(MF, FI));
20140 }else {
20141// Second half of f64 is passed in another GPR.
20142Register HiVReg =RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20143RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
20144Hi = DAG.getCopyFromReg(Chain,DL, HiVReg, MVT::i32);
20145 }
20146return DAG.getNode(RISCVISD::BuildPairF64,DL, MVT::f64,Lo,Hi);
20147}
20148
20149// Transform physical registers into virtual registers.
20150SDValueRISCVTargetLowering::LowerFormalArguments(
20151SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,
20152constSmallVectorImpl<ISD::InputArg> &Ins,constSDLoc &DL,
20153SelectionDAG &DAG,SmallVectorImpl<SDValue> &InVals) const{
20154
20155MachineFunction &MF = DAG.getMachineFunction();
20156
20157switch (CallConv) {
20158default:
20159report_fatal_error("Unsupported calling convention");
20160caseCallingConv::C:
20161caseCallingConv::Fast:
20162caseCallingConv::SPIR_KERNEL:
20163caseCallingConv::GRAAL:
20164caseCallingConv::RISCV_VectorCall:
20165break;
20166caseCallingConv::GHC:
20167if (Subtarget.hasStdExtE())
20168report_fatal_error("GHC calling convention is not supported on RVE!");
20169if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
20170report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
20171"(Zdinx/D) instruction set extensions");
20172 }
20173
20174constFunction &Func = MF.getFunction();
20175if (Func.hasFnAttribute("interrupt")) {
20176if (!Func.arg_empty())
20177report_fatal_error(
20178"Functions with the interrupt attribute cannot have arguments!");
20179
20180StringRef Kind =
20181 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20182
20183if (!(Kind =="user" || Kind =="supervisor" || Kind =="machine"))
20184report_fatal_error(
20185"Function interrupt attribute argument not supported!");
20186 }
20187
20188EVT PtrVT =getPointerTy(DAG.getDataLayout());
20189MVT XLenVT = Subtarget.getXLenVT();
20190unsigned XLenInBytes = Subtarget.getXLen() / 8;
20191// Used with vargs to acumulate store chains.
20192 std::vector<SDValue> OutChains;
20193
20194// Assign locations to all of the incoming arguments.
20195SmallVector<CCValAssign, 16> ArgLocs;
20196CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20197
20198if (CallConv ==CallingConv::GHC)
20199 CCInfo.AnalyzeFormalArguments(Ins,CC_RISCV_GHC);
20200else
20201 analyzeInputArgs(MF, CCInfo, Ins,/*IsRet=*/false,
20202 CallConv ==CallingConv::Fast ?CC_RISCV_FastCC
20203 :CC_RISCV);
20204
20205for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20206CCValAssign &VA = ArgLocs[i];
20207SDValue ArgValue;
20208// Passing f64 on RV32D with a soft float ABI must be handled as a special
20209// case.
20210if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20211assert(VA.needsCustom());
20212 ArgValue =unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i],DL);
20213 }elseif (VA.isRegLoc())
20214 ArgValue =unpackFromRegLoc(DAG, Chain, VA,DL, Ins[InsIdx], *this);
20215else
20216 ArgValue =unpackFromMemLoc(DAG, Chain, VA,DL);
20217
20218if (VA.getLocInfo() ==CCValAssign::Indirect) {
20219// If the original argument was split and passed by reference (e.g. i128
20220// on RV32), we need to load all parts of it here (using the same
20221// address). Vectors may be partly split to registers and partly to the
20222// stack, in which case the base address is partly offset and subsequent
20223// stores are relative to that.
20224 InVals.push_back(DAG.getLoad(VA.getValVT(),DL, Chain, ArgValue,
20225MachinePointerInfo()));
20226unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20227unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20228assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20229while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20230CCValAssign &PartVA = ArgLocs[i + 1];
20231unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20232SDValueOffset = DAG.getIntPtrConstant(PartOffset,DL);
20233if (PartVA.getValVT().isScalableVector())
20234Offset = DAG.getNode(ISD::VSCALE,DL, XLenVT,Offset);
20235SDValueAddress = DAG.getNode(ISD::ADD,DL, PtrVT, ArgValue,Offset);
20236 InVals.push_back(DAG.getLoad(PartVA.getValVT(),DL, Chain,Address,
20237MachinePointerInfo()));
20238 ++i;
20239 ++InsIdx;
20240 }
20241continue;
20242 }
20243 InVals.push_back(ArgValue);
20244 }
20245
20246if (any_of(ArgLocs,
20247 [](CCValAssign &VA) {return VA.getLocVT().isScalableVector(); }))
20248 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20249
20250if (IsVarArg) {
20251ArrayRef<MCPhysReg> ArgRegs =RISCV::getArgGPRs(Subtarget.getTargetABI());
20252unsignedIdx = CCInfo.getFirstUnallocated(ArgRegs);
20253constTargetRegisterClass *RC = &RISCV::GPRRegClass;
20254MachineFrameInfo &MFI = MF.getFrameInfo();
20255MachineRegisterInfo &RegInfo = MF.getRegInfo();
20256RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
20257
20258// Size of the vararg save area. For now, the varargs save area is either
20259// zero or large enough to hold a0-a7.
20260int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() -Idx);
20261int FI;
20262
20263// If all registers are allocated, then all varargs must be passed on the
20264// stack and we don't need to save any argregs.
20265if (VarArgsSaveSize == 0) {
20266int VaArgOffset = CCInfo.getStackSize();
20267 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset,true);
20268 }else {
20269int VaArgOffset = -VarArgsSaveSize;
20270 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset,true);
20271
20272// If saving an odd number of registers then create an extra stack slot to
20273// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20274// offsets to even-numbered registered remain 2*XLEN-aligned.
20275if (Idx % 2) {
20276 MFI.CreateFixedObject(
20277 XLenInBytes, VaArgOffset -static_cast<int>(XLenInBytes),true);
20278 VarArgsSaveSize += XLenInBytes;
20279 }
20280
20281SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20282
20283// Copy the integer registers that may have been used for passing varargs
20284// to the vararg save area.
20285for (unsignedI =Idx;I < ArgRegs.size(); ++I) {
20286constRegister Reg = RegInfo.createVirtualRegister(RC);
20287 RegInfo.addLiveIn(ArgRegs[I], Reg);
20288SDValue ArgValue = DAG.getCopyFromReg(Chain,DL, Reg, XLenVT);
20289SDValue Store = DAG.getStore(
20290 Chain,DL, ArgValue, FIN,
20291MachinePointerInfo::getFixedStack(MF, FI, (I -Idx) * XLenInBytes));
20292 OutChains.push_back(Store);
20293 FIN =
20294 DAG.getMemBasePlusOffset(FIN,TypeSize::getFixed(XLenInBytes),DL);
20295 }
20296 }
20297
20298// Record the frame index of the first variable argument
20299// which is a value necessary to VASTART.
20300 RVFI->setVarArgsFrameIndex(FI);
20301 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20302 }
20303
20304// All stores are grouped in one node to allow the matching between
20305// the size of Ins and InVals. This only happens for vararg functions.
20306if (!OutChains.empty()) {
20307 OutChains.push_back(Chain);
20308 Chain = DAG.getNode(ISD::TokenFactor,DL, MVT::Other, OutChains);
20309 }
20310
20311return Chain;
20312}
20313
20314/// isEligibleForTailCallOptimization - Check whether the call is eligible
20315/// for tail call optimization.
20316/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20317bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20318CCState &CCInfo, CallLoweringInfo &CLI,MachineFunction &MF,
20319constSmallVector<CCValAssign, 16> &ArgLocs) const{
20320
20321auto CalleeCC = CLI.CallConv;
20322auto &Outs = CLI.Outs;
20323auto &Caller = MF.getFunction();
20324auto CallerCC = Caller.getCallingConv();
20325
20326// Exception-handling functions need a special set of instructions to
20327// indicate a return to the hardware. Tail-calling another function would
20328// probably break this.
20329// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20330// should be expanded as new function attributes are introduced.
20331if (Caller.hasFnAttribute("interrupt"))
20332returnfalse;
20333
20334// Do not tail call opt if the stack is used to pass parameters.
20335if (CCInfo.getStackSize() != 0)
20336returnfalse;
20337
20338// Do not tail call opt if any parameters need to be passed indirectly.
20339// Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20340// passed indirectly. So the address of the value will be passed in a
20341// register, or if not available, then the address is put on the stack. In
20342// order to pass indirectly, space on the stack often needs to be allocated
20343// in order to store the value. In this case the CCInfo.getNextStackOffset()
20344// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20345// are passed CCValAssign::Indirect.
20346for (auto &VA : ArgLocs)
20347if (VA.getLocInfo() ==CCValAssign::Indirect)
20348returnfalse;
20349
20350// Do not tail call opt if either caller or callee uses struct return
20351// semantics.
20352auto IsCallerStructRet = Caller.hasStructRetAttr();
20353auto IsCalleeStructRet = Outs.empty() ?false : Outs[0].Flags.isSRet();
20354if (IsCallerStructRet || IsCalleeStructRet)
20355returnfalse;
20356
20357// The callee has to preserve all registers the caller needs to preserve.
20358constRISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20359constuint32_t *CallerPreserved =TRI->getCallPreservedMask(MF, CallerCC);
20360if (CalleeCC != CallerCC) {
20361constuint32_t *CalleePreserved =TRI->getCallPreservedMask(MF, CalleeCC);
20362if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20363returnfalse;
20364 }
20365
20366// Byval parameters hand the function a pointer directly into the stack area
20367// we want to reuse during a tail call. Working around this *is* possible
20368// but less efficient and uglier in LowerCall.
20369for (auto &Arg : Outs)
20370if (Arg.Flags.isByVal())
20371returnfalse;
20372
20373returntrue;
20374}
20375
20376staticAligngetPrefTypeAlign(EVT VT,SelectionDAG &DAG) {
20377return DAG.getDataLayout().getPrefTypeAlign(
20378 VT.getTypeForEVT(*DAG.getContext()));
20379}
20380
20381// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20382// and output parameter nodes.
20383SDValueRISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
20384SmallVectorImpl<SDValue> &InVals) const{
20385SelectionDAG &DAG = CLI.DAG;
20386SDLoc &DL = CLI.DL;
20387SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
20388SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20389SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
20390SDValue Chain = CLI.Chain;
20391SDValue Callee = CLI.Callee;
20392bool &IsTailCall = CLI.IsTailCall;
20393CallingConv::ID CallConv = CLI.CallConv;
20394bool IsVarArg = CLI.IsVarArg;
20395EVT PtrVT =getPointerTy(DAG.getDataLayout());
20396MVT XLenVT = Subtarget.getXLenVT();
20397
20398MachineFunction &MF = DAG.getMachineFunction();
20399
20400// Analyze the operands of the call, assigning locations to each operand.
20401SmallVector<CCValAssign, 16> ArgLocs;
20402CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20403
20404if (CallConv ==CallingConv::GHC) {
20405if (Subtarget.hasStdExtE())
20406report_fatal_error("GHC calling convention is not supported on RVE!");
20407 ArgCCInfo.AnalyzeCallOperands(Outs,CC_RISCV_GHC);
20408 }else
20409 analyzeOutputArgs(MF, ArgCCInfo, Outs,/*IsRet=*/false, &CLI,
20410 CallConv ==CallingConv::Fast ?CC_RISCV_FastCC
20411 :CC_RISCV);
20412
20413// Check if it's really possible to do a tail call.
20414if (IsTailCall)
20415 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20416
20417if (IsTailCall)
20418 ++NumTailCalls;
20419elseif (CLI.CB && CLI.CB->isMustTailCall())
20420report_fatal_error("failed to perform tail call elimination on a call "
20421"site marked musttail");
20422
20423// Get a count of how many bytes are to be pushed on the stack.
20424unsigned NumBytes = ArgCCInfo.getStackSize();
20425
20426// Create local copies for byval args
20427SmallVector<SDValue, 8> ByValArgs;
20428for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20429ISD::ArgFlagsTy Flags = Outs[i].Flags;
20430if (!Flags.isByVal())
20431continue;
20432
20433SDValue Arg = OutVals[i];
20434unsignedSize = Flags.getByValSize();
20435Align Alignment = Flags.getNonZeroByValAlign();
20436
20437int FI =
20438 MF.getFrameInfo().CreateStackObject(Size, Alignment,/*isSS=*/false);
20439SDValue FIPtr = DAG.getFrameIndex(FI,getPointerTy(DAG.getDataLayout()));
20440SDValue SizeNode = DAG.getConstant(Size,DL, XLenVT);
20441
20442 Chain = DAG.getMemcpy(Chain,DL, FIPtr, Arg, SizeNode, Alignment,
20443/*IsVolatile=*/false,
20444/*AlwaysInline=*/false,/*CI*/nullptr, IsTailCall,
20445MachinePointerInfo(),MachinePointerInfo());
20446 ByValArgs.push_back(FIPtr);
20447 }
20448
20449if (!IsTailCall)
20450 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20451
20452// Copy argument values to their designated locations.
20453SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
20454SmallVector<SDValue, 8> MemOpChains;
20455SDValue StackPtr;
20456for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20457 ++i, ++OutIdx) {
20458CCValAssign &VA = ArgLocs[i];
20459SDValue ArgValue = OutVals[OutIdx];
20460ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20461
20462// Handle passing f64 on RV32D with a soft float ABI as a special case.
20463if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20464assert(VA.isRegLoc() &&"Expected register VA assignment");
20465assert(VA.needsCustom());
20466SDValue SplitF64 = DAG.getNode(
20467RISCVISD::SplitF64,DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20468SDValueLo = SplitF64.getValue(0);
20469SDValueHi = SplitF64.getValue(1);
20470
20471Register RegLo = VA.getLocReg();
20472 RegsToPass.push_back(std::make_pair(RegLo,Lo));
20473
20474// Get the CCValAssign for the Hi part.
20475CCValAssign &HiVA = ArgLocs[++i];
20476
20477if (HiVA.isMemLoc()) {
20478// Second half of f64 is passed on the stack.
20479if (!StackPtr.getNode())
20480 StackPtr = DAG.getCopyFromReg(Chain,DL, RISCV::X2, PtrVT);
20481SDValueAddress =
20482 DAG.getNode(ISD::ADD,DL, PtrVT, StackPtr,
20483 DAG.getIntPtrConstant(HiVA.getLocMemOffset(),DL));
20484// Emit the store.
20485 MemOpChains.push_back(DAG.getStore(
20486 Chain,DL,Hi,Address,
20487MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));
20488 }else {
20489// Second half of f64 is passed in another GPR.
20490Register RegHigh = HiVA.getLocReg();
20491 RegsToPass.push_back(std::make_pair(RegHigh,Hi));
20492 }
20493continue;
20494 }
20495
20496// Promote the value if needed.
20497// For now, only handle fully promoted and indirect arguments.
20498if (VA.getLocInfo() ==CCValAssign::Indirect) {
20499// Store the argument in a stack slot and pass its address.
20500Align StackAlign =
20501 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20502getPrefTypeAlign(ArgValue.getValueType(), DAG));
20503TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20504// If the original argument was split (e.g. i128), we need
20505// to store the required parts of it here (and pass just one address).
20506// Vectors may be partly split to registers and partly to the stack, in
20507// which case the base address is partly offset and subsequent stores are
20508// relative to that.
20509unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20510unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20511assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20512// Calculate the total size to store. We don't have access to what we're
20513// actually storing other than performing the loop and collecting the
20514// info.
20515SmallVector<std::pair<SDValue, SDValue>> Parts;
20516while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20517SDValue PartValue = OutVals[OutIdx + 1];
20518unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20519SDValueOffset = DAG.getIntPtrConstant(PartOffset,DL);
20520EVT PartVT = PartValue.getValueType();
20521if (PartVT.isScalableVector())
20522Offset = DAG.getNode(ISD::VSCALE,DL, XLenVT,Offset);
20523 StoredSize += PartVT.getStoreSize();
20524 StackAlign = std::max(StackAlign,getPrefTypeAlign(PartVT, DAG));
20525 Parts.push_back(std::make_pair(PartValue,Offset));
20526 ++i;
20527 ++OutIdx;
20528 }
20529SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20530int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20531 MemOpChains.push_back(
20532 DAG.getStore(Chain,DL, ArgValue, SpillSlot,
20533MachinePointerInfo::getFixedStack(MF, FI)));
20534for (constauto &Part : Parts) {
20535SDValue PartValue = Part.first;
20536SDValue PartOffset = Part.second;
20537SDValueAddress =
20538 DAG.getNode(ISD::ADD,DL, PtrVT, SpillSlot, PartOffset);
20539 MemOpChains.push_back(
20540 DAG.getStore(Chain,DL, PartValue,Address,
20541MachinePointerInfo::getFixedStack(MF, FI)));
20542 }
20543 ArgValue = SpillSlot;
20544 }else {
20545 ArgValue =convertValVTToLocVT(DAG, ArgValue, VA,DL, Subtarget);
20546 }
20547
20548// Use local copy if it is a byval arg.
20549if (Flags.isByVal())
20550 ArgValue = ByValArgs[j++];
20551
20552if (VA.isRegLoc()) {
20553// Queue up the argument copies and emit them at the end.
20554 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20555 }else {
20556assert(VA.isMemLoc() &&"Argument not register or memory");
20557assert(!IsTailCall &&"Tail call not allowed if stack is used "
20558"for passing parameters");
20559
20560// Work out the address of the stack slot.
20561if (!StackPtr.getNode())
20562 StackPtr = DAG.getCopyFromReg(Chain,DL, RISCV::X2, PtrVT);
20563SDValueAddress =
20564 DAG.getNode(ISD::ADD,DL, PtrVT, StackPtr,
20565 DAG.getIntPtrConstant(VA.getLocMemOffset(),DL));
20566
20567// Emit the store.
20568 MemOpChains.push_back(
20569 DAG.getStore(Chain,DL, ArgValue,Address,
20570MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));
20571 }
20572 }
20573
20574// Join the stores, which are independent of one another.
20575if (!MemOpChains.empty())
20576 Chain = DAG.getNode(ISD::TokenFactor,DL, MVT::Other, MemOpChains);
20577
20578SDValue Glue;
20579
20580// Build a sequence of copy-to-reg nodes, chained and glued together.
20581for (auto &Reg : RegsToPass) {
20582 Chain = DAG.getCopyToReg(Chain,DL, Reg.first, Reg.second, Glue);
20583 Glue = Chain.getValue(1);
20584 }
20585
20586// Validate that none of the argument registers have been marked as
20587// reserved, if so report an error. Do the same for the return address if this
20588// is not a tailcall.
20589 validateCCReservedRegs(RegsToPass, MF);
20590if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20591 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
20592 MF.getFunction(),
20593"Return address register required, but has been reserved."});
20594
20595// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20596// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20597// split it and then direct call can be matched by PseudoCALL.
20598bool CalleeIsLargeExternalSymbol =false;
20599if (getTargetMachine().getCodeModel() ==CodeModel::Large) {
20600if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20601 Callee =getLargeGlobalAddress(S,DL, PtrVT, DAG);
20602elseif (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20603 Callee =getLargeExternalSymbol(S,DL, PtrVT, DAG);
20604 CalleeIsLargeExternalSymbol =true;
20605 }
20606 }elseif (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20607constGlobalValue *GV = S->getGlobal();
20608 Callee = DAG.getTargetGlobalAddress(GV,DL, PtrVT, 0,RISCVII::MO_CALL);
20609 }elseif (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20610 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT,RISCVII::MO_CALL);
20611 }
20612
20613// The first call operand is the chain and the second is the target address.
20614SmallVector<SDValue, 8> Ops;
20615 Ops.push_back(Chain);
20616 Ops.push_back(Callee);
20617
20618// Add argument registers to the end of the list so that they are
20619// known live into the call.
20620for (auto &Reg : RegsToPass)
20621 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20622
20623// Add a register mask operand representing the call-preserved registers.
20624constTargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20625constuint32_t *Mask =TRI->getCallPreservedMask(MF, CallConv);
20626assert(Mask &&"Missing call preserved mask for calling convention");
20627 Ops.push_back(DAG.getRegisterMask(Mask));
20628
20629// Glue the call to the argument copies, if any.
20630if (Glue.getNode())
20631 Ops.push_back(Glue);
20632
20633assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20634"Unexpected CFI type for a direct call");
20635
20636// Emit the call.
20637SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20638
20639// Use software guarded branch for large code model non-indirect calls
20640// Tail call to external symbol will have a null CLI.CB and we need another
20641// way to determine the callsite type
20642bool NeedSWGuarded =false;
20643if (getTargetMachine().getCodeModel() ==CodeModel::Large &&
20644 Subtarget.hasStdExtZicfilp() &&
20645 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20646 NeedSWGuarded =true;
20647
20648if (IsTailCall) {
20649 MF.getFrameInfo().setHasTailCall();
20650unsigned CallOpc =
20651 NeedSWGuarded ?RISCVISD::SW_GUARDED_TAIL :RISCVISD::TAIL;
20652SDValue Ret = DAG.getNode(CallOpc,DL, NodeTys, Ops);
20653if (CLI.CFIType)
20654 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20655 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20656return Ret;
20657 }
20658
20659unsigned CallOpc = NeedSWGuarded ?RISCVISD::SW_GUARDED_CALL :RISCVISD::CALL;
20660 Chain = DAG.getNode(CallOpc,DL, NodeTys, Ops);
20661if (CLI.CFIType)
20662 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20663 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20664 Glue = Chain.getValue(1);
20665
20666// Mark the end of the call, which is glued to the call itself.
20667 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue,DL);
20668 Glue = Chain.getValue(1);
20669
20670// Assign locations to each value returned by this call.
20671SmallVector<CCValAssign, 16> RVLocs;
20672CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20673 analyzeInputArgs(MF, RetCCInfo, Ins,/*IsRet=*/true,CC_RISCV);
20674
20675// Copy all of the result registers out of their specified physreg.
20676for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20677auto &VA = RVLocs[i];
20678// Copy the value out
20679SDValue RetValue =
20680 DAG.getCopyFromReg(Chain,DL, VA.getLocReg(), VA.getLocVT(), Glue);
20681// Glue the RetValue to the end of the call sequence
20682 Chain = RetValue.getValue(1);
20683 Glue = RetValue.getValue(2);
20684
20685if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20686assert(VA.needsCustom());
20687SDValue RetValue2 = DAG.getCopyFromReg(Chain,DL, RVLocs[++i].getLocReg(),
20688 MVT::i32, Glue);
20689 Chain = RetValue2.getValue(1);
20690 Glue = RetValue2.getValue(2);
20691 RetValue = DAG.getNode(RISCVISD::BuildPairF64,DL, MVT::f64, RetValue,
20692 RetValue2);
20693 }else
20694 RetValue =convertLocVTToValVT(DAG, RetValue, VA,DL, Subtarget);
20695
20696 InVals.push_back(RetValue);
20697 }
20698
20699return Chain;
20700}
20701
20702boolRISCVTargetLowering::CanLowerReturn(
20703CallingConv::ID CallConv,MachineFunction &MF,bool IsVarArg,
20704constSmallVectorImpl<ISD::OutputArg> &Outs,LLVMContext &Context,
20705constType *RetTy) const{
20706SmallVector<CCValAssign, 16> RVLocs;
20707CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20708
20709for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20710MVT VT = Outs[i].VT;
20711ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20712if (CC_RISCV(i, VT, VT,CCValAssign::Full, ArgFlags, CCInfo,
20713/*IsFixed=*/true,/*IsRet=*/true,nullptr))
20714returnfalse;
20715 }
20716returntrue;
20717}
20718
20719SDValue
20720RISCVTargetLowering::LowerReturn(SDValue Chain,CallingConv::ID CallConv,
20721bool IsVarArg,
20722constSmallVectorImpl<ISD::OutputArg> &Outs,
20723constSmallVectorImpl<SDValue> &OutVals,
20724constSDLoc &DL,SelectionDAG &DAG) const{
20725MachineFunction &MF = DAG.getMachineFunction();
20726constRISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20727
20728// Stores the assignment of the return value to a location.
20729SmallVector<CCValAssign, 16> RVLocs;
20730
20731// Info about the registers and stack slot.
20732CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20733 *DAG.getContext());
20734
20735 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs,/*IsRet=*/true,
20736nullptr,CC_RISCV);
20737
20738if (CallConv ==CallingConv::GHC && !RVLocs.empty())
20739report_fatal_error("GHC functions return void only");
20740
20741SDValue Glue;
20742SmallVector<SDValue, 4> RetOps(1, Chain);
20743
20744// Copy the result values into the output registers.
20745for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20746SDValue Val = OutVals[OutIdx];
20747CCValAssign &VA = RVLocs[i];
20748assert(VA.isRegLoc() &&"Can only return in registers!");
20749
20750if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20751// Handle returning f64 on RV32D with a soft float ABI.
20752assert(VA.isRegLoc() &&"Expected return via registers");
20753assert(VA.needsCustom());
20754SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64,DL,
20755 DAG.getVTList(MVT::i32, MVT::i32), Val);
20756SDValueLo = SplitF64.getValue(0);
20757SDValueHi = SplitF64.getValue(1);
20758Register RegLo = VA.getLocReg();
20759Register RegHi = RVLocs[++i].getLocReg();
20760
20761if (STI.isRegisterReservedByUser(RegLo) ||
20762 STI.isRegisterReservedByUser(RegHi))
20763 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
20764 MF.getFunction(),
20765"Return value register required, but has been reserved."});
20766
20767 Chain = DAG.getCopyToReg(Chain,DL, RegLo,Lo, Glue);
20768 Glue = Chain.getValue(1);
20769 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20770 Chain = DAG.getCopyToReg(Chain,DL, RegHi,Hi, Glue);
20771 Glue = Chain.getValue(1);
20772 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20773 }else {
20774// Handle a 'normal' return.
20775 Val =convertValVTToLocVT(DAG, Val, VA,DL, Subtarget);
20776 Chain = DAG.getCopyToReg(Chain,DL, VA.getLocReg(), Val, Glue);
20777
20778if (STI.isRegisterReservedByUser(VA.getLocReg()))
20779 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
20780 MF.getFunction(),
20781"Return value register required, but has been reserved."});
20782
20783// Guarantee that all emitted copies are stuck together.
20784 Glue = Chain.getValue(1);
20785 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20786 }
20787 }
20788
20789 RetOps[0] = Chain;// Update chain.
20790
20791// Add the glue node if we have it.
20792if (Glue.getNode()) {
20793 RetOps.push_back(Glue);
20794 }
20795
20796if (any_of(RVLocs,
20797 [](CCValAssign &VA) {return VA.getLocVT().isScalableVector(); }))
20798 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20799
20800unsigned RetOpc =RISCVISD::RET_GLUE;
20801// Interrupt service routines use different return instructions.
20802constFunction &Func = DAG.getMachineFunction().getFunction();
20803if (Func.hasFnAttribute("interrupt")) {
20804if (!Func.getReturnType()->isVoidTy())
20805report_fatal_error(
20806"Functions with the interrupt attribute must have void return type!");
20807
20808MachineFunction &MF = DAG.getMachineFunction();
20809StringRef Kind =
20810 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20811
20812if (Kind =="supervisor")
20813 RetOpc =RISCVISD::SRET_GLUE;
20814else
20815 RetOpc =RISCVISD::MRET_GLUE;
20816 }
20817
20818return DAG.getNode(RetOpc,DL, MVT::Other, RetOps);
20819}
20820
20821void RISCVTargetLowering::validateCCReservedRegs(
20822constSmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20823MachineFunction &MF) const{
20824constFunction &F = MF.getFunction();
20825constRISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20826
20827if (llvm::any_of(Regs, [&STI](auto Reg) {
20828return STI.isRegisterReservedByUser(Reg.first);
20829 }))
20830F.getContext().diagnose(DiagnosticInfoUnsupported{
20831F,"Argument register required, but has been reserved."});
20832}
20833
20834// Check if the result of the node is only used as a return value, as
20835// otherwise we can't perform a tail-call.
20836boolRISCVTargetLowering::isUsedByReturnOnly(SDNode *N,SDValue &Chain) const{
20837if (N->getNumValues() != 1)
20838returnfalse;
20839if (!N->hasNUsesOfValue(1, 0))
20840returnfalse;
20841
20842SDNode *Copy = *N->user_begin();
20843
20844if (Copy->getOpcode() ==ISD::BITCAST) {
20845returnisUsedByReturnOnly(Copy, Chain);
20846 }
20847
20848// TODO: Handle additional opcodes in order to support tail-calling libcalls
20849// with soft float ABIs.
20850if (Copy->getOpcode() !=ISD::CopyToReg) {
20851returnfalse;
20852 }
20853
20854// If the ISD::CopyToReg has a glue operand, we conservatively assume it
20855// isn't safe to perform a tail call.
20856if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20857returnfalse;
20858
20859// The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20860bool HasRet =false;
20861for (SDNode *Node : Copy->users()) {
20862if (Node->getOpcode() !=RISCVISD::RET_GLUE)
20863returnfalse;
20864 HasRet =true;
20865 }
20866if (!HasRet)
20867returnfalse;
20868
20869 Chain = Copy->getOperand(0);
20870returntrue;
20871}
20872
20873boolRISCVTargetLowering::mayBeEmittedAsTailCall(constCallInst *CI) const{
20874return CI->isTailCall();
20875}
20876
20877constchar *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const{
20878#define NODE_NAME_CASE(NODE) \
20879 case RISCVISD::NODE: \
20880 return "RISCVISD::" #NODE;
20881// clang-format off
20882switch ((RISCVISD::NodeType)Opcode) {
20883caseRISCVISD::FIRST_NUMBER:
20884break;
20885NODE_NAME_CASE(RET_GLUE)
20886NODE_NAME_CASE(SRET_GLUE)
20887NODE_NAME_CASE(MRET_GLUE)
20888NODE_NAME_CASE(CALL)
20889NODE_NAME_CASE(TAIL)
20890NODE_NAME_CASE(SELECT_CC)
20891NODE_NAME_CASE(BR_CC)
20892NODE_NAME_CASE(BuildGPRPair)
20893NODE_NAME_CASE(SplitGPRPair)
20894NODE_NAME_CASE(BuildPairF64)
20895NODE_NAME_CASE(SplitF64)
20896NODE_NAME_CASE(ADD_LO)
20897NODE_NAME_CASE(HI)
20898NODE_NAME_CASE(LLA)
20899NODE_NAME_CASE(ADD_TPREL)
20900NODE_NAME_CASE(MULHSU)
20901NODE_NAME_CASE(SHL_ADD)
20902NODE_NAME_CASE(SLLW)
20903NODE_NAME_CASE(SRAW)
20904NODE_NAME_CASE(SRLW)
20905NODE_NAME_CASE(DIVW)
20906NODE_NAME_CASE(DIVUW)
20907NODE_NAME_CASE(REMUW)
20908NODE_NAME_CASE(ROLW)
20909NODE_NAME_CASE(RORW)
20910NODE_NAME_CASE(CLZW)
20911NODE_NAME_CASE(CTZW)
20912NODE_NAME_CASE(ABSW)
20913NODE_NAME_CASE(FMV_H_X)
20914NODE_NAME_CASE(FMV_X_ANYEXTH)
20915NODE_NAME_CASE(FMV_X_SIGNEXTH)
20916NODE_NAME_CASE(FMV_W_X_RV64)
20917NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20918NODE_NAME_CASE(FCVT_X)
20919NODE_NAME_CASE(FCVT_XU)
20920NODE_NAME_CASE(FCVT_W_RV64)
20921NODE_NAME_CASE(FCVT_WU_RV64)
20922NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20923NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20924NODE_NAME_CASE(FROUND)
20925NODE_NAME_CASE(FCLASS)
20926NODE_NAME_CASE(FSGNJX)
20927NODE_NAME_CASE(FMAX)
20928NODE_NAME_CASE(FMIN)
20929NODE_NAME_CASE(FLI)
20930NODE_NAME_CASE(READ_COUNTER_WIDE)
20931NODE_NAME_CASE(BREV8)
20932NODE_NAME_CASE(ORC_B)
20933NODE_NAME_CASE(ZIP)
20934NODE_NAME_CASE(UNZIP)
20935NODE_NAME_CASE(CLMUL)
20936NODE_NAME_CASE(CLMULH)
20937NODE_NAME_CASE(CLMULR)
20938NODE_NAME_CASE(MOPR)
20939NODE_NAME_CASE(MOPRR)
20940NODE_NAME_CASE(SHA256SIG0)
20941NODE_NAME_CASE(SHA256SIG1)
20942NODE_NAME_CASE(SHA256SUM0)
20943NODE_NAME_CASE(SHA256SUM1)
20944NODE_NAME_CASE(SM4KS)
20945NODE_NAME_CASE(SM4ED)
20946NODE_NAME_CASE(SM3P0)
20947NODE_NAME_CASE(SM3P1)
20948NODE_NAME_CASE(TH_LWD)
20949NODE_NAME_CASE(TH_LWUD)
20950NODE_NAME_CASE(TH_LDD)
20951NODE_NAME_CASE(TH_SWD)
20952NODE_NAME_CASE(TH_SDD)
20953NODE_NAME_CASE(VMV_V_V_VL)
20954NODE_NAME_CASE(VMV_V_X_VL)
20955NODE_NAME_CASE(VFMV_V_F_VL)
20956NODE_NAME_CASE(VMV_X_S)
20957NODE_NAME_CASE(VMV_S_X_VL)
20958NODE_NAME_CASE(VFMV_S_F_VL)
20959NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20960NODE_NAME_CASE(READ_VLENB)
20961NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20962NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20963NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20964NODE_NAME_CASE(VSLIDEUP_VL)
20965NODE_NAME_CASE(VSLIDE1UP_VL)
20966NODE_NAME_CASE(VSLIDEDOWN_VL)
20967NODE_NAME_CASE(VSLIDE1DOWN_VL)
20968NODE_NAME_CASE(VFSLIDE1UP_VL)
20969NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20970NODE_NAME_CASE(VID_VL)
20971NODE_NAME_CASE(VFNCVT_ROD_VL)
20972NODE_NAME_CASE(VECREDUCE_ADD_VL)
20973NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20974NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20975NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20976NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20977NODE_NAME_CASE(VECREDUCE_AND_VL)
20978NODE_NAME_CASE(VECREDUCE_OR_VL)
20979NODE_NAME_CASE(VECREDUCE_XOR_VL)
20980NODE_NAME_CASE(VECREDUCE_FADD_VL)
20981NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20982NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20983NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20984NODE_NAME_CASE(ADD_VL)
20985NODE_NAME_CASE(AND_VL)
20986NODE_NAME_CASE(MUL_VL)
20987NODE_NAME_CASE(OR_VL)
20988NODE_NAME_CASE(SDIV_VL)
20989NODE_NAME_CASE(SHL_VL)
20990NODE_NAME_CASE(SREM_VL)
20991NODE_NAME_CASE(SRA_VL)
20992NODE_NAME_CASE(SRL_VL)
20993NODE_NAME_CASE(ROTL_VL)
20994NODE_NAME_CASE(ROTR_VL)
20995NODE_NAME_CASE(SUB_VL)
20996NODE_NAME_CASE(UDIV_VL)
20997NODE_NAME_CASE(UREM_VL)
20998NODE_NAME_CASE(XOR_VL)
20999NODE_NAME_CASE(AVGFLOORS_VL)
21000NODE_NAME_CASE(AVGFLOORU_VL)
21001NODE_NAME_CASE(AVGCEILS_VL)
21002NODE_NAME_CASE(AVGCEILU_VL)
21003NODE_NAME_CASE(SADDSAT_VL)
21004NODE_NAME_CASE(UADDSAT_VL)
21005NODE_NAME_CASE(SSUBSAT_VL)
21006NODE_NAME_CASE(USUBSAT_VL)
21007NODE_NAME_CASE(FADD_VL)
21008NODE_NAME_CASE(FSUB_VL)
21009NODE_NAME_CASE(FMUL_VL)
21010NODE_NAME_CASE(FDIV_VL)
21011NODE_NAME_CASE(FNEG_VL)
21012NODE_NAME_CASE(FABS_VL)
21013NODE_NAME_CASE(FSQRT_VL)
21014NODE_NAME_CASE(FCLASS_VL)
21015NODE_NAME_CASE(VFMADD_VL)
21016NODE_NAME_CASE(VFNMADD_VL)
21017NODE_NAME_CASE(VFMSUB_VL)
21018NODE_NAME_CASE(VFNMSUB_VL)
21019NODE_NAME_CASE(VFWMADD_VL)
21020NODE_NAME_CASE(VFWNMADD_VL)
21021NODE_NAME_CASE(VFWMSUB_VL)
21022NODE_NAME_CASE(VFWNMSUB_VL)
21023NODE_NAME_CASE(FCOPYSIGN_VL)
21024NODE_NAME_CASE(SMIN_VL)
21025NODE_NAME_CASE(SMAX_VL)
21026NODE_NAME_CASE(UMIN_VL)
21027NODE_NAME_CASE(UMAX_VL)
21028NODE_NAME_CASE(BITREVERSE_VL)
21029NODE_NAME_CASE(BSWAP_VL)
21030NODE_NAME_CASE(CTLZ_VL)
21031NODE_NAME_CASE(CTTZ_VL)
21032NODE_NAME_CASE(CTPOP_VL)
21033NODE_NAME_CASE(VFMIN_VL)
21034NODE_NAME_CASE(VFMAX_VL)
21035NODE_NAME_CASE(MULHS_VL)
21036NODE_NAME_CASE(MULHU_VL)
21037NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
21038NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
21039NODE_NAME_CASE(VFCVT_RM_X_F_VL)
21040NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
21041NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
21042NODE_NAME_CASE(SINT_TO_FP_VL)
21043NODE_NAME_CASE(UINT_TO_FP_VL)
21044NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
21045NODE_NAME_CASE(VFCVT_RM_F_X_VL)
21046NODE_NAME_CASE(FP_EXTEND_VL)
21047NODE_NAME_CASE(FP_ROUND_VL)
21048NODE_NAME_CASE(STRICT_FADD_VL)
21049NODE_NAME_CASE(STRICT_FSUB_VL)
21050NODE_NAME_CASE(STRICT_FMUL_VL)
21051NODE_NAME_CASE(STRICT_FDIV_VL)
21052NODE_NAME_CASE(STRICT_FSQRT_VL)
21053NODE_NAME_CASE(STRICT_VFMADD_VL)
21054NODE_NAME_CASE(STRICT_VFNMADD_VL)
21055NODE_NAME_CASE(STRICT_VFMSUB_VL)
21056NODE_NAME_CASE(STRICT_VFNMSUB_VL)
21057NODE_NAME_CASE(STRICT_FP_ROUND_VL)
21058NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
21059NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
21060NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
21061NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
21062NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
21063NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
21064NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
21065NODE_NAME_CASE(STRICT_FSETCC_VL)
21066NODE_NAME_CASE(STRICT_FSETCCS_VL)
21067NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
21068NODE_NAME_CASE(VWMUL_VL)
21069NODE_NAME_CASE(VWMULU_VL)
21070NODE_NAME_CASE(VWMULSU_VL)
21071NODE_NAME_CASE(VWADD_VL)
21072NODE_NAME_CASE(VWADDU_VL)
21073NODE_NAME_CASE(VWSUB_VL)
21074NODE_NAME_CASE(VWSUBU_VL)
21075NODE_NAME_CASE(VWADD_W_VL)
21076NODE_NAME_CASE(VWADDU_W_VL)
21077NODE_NAME_CASE(VWSUB_W_VL)
21078NODE_NAME_CASE(VWSUBU_W_VL)
21079NODE_NAME_CASE(VWSLL_VL)
21080NODE_NAME_CASE(VFWMUL_VL)
21081NODE_NAME_CASE(VFWADD_VL)
21082NODE_NAME_CASE(VFWSUB_VL)
21083NODE_NAME_CASE(VFWADD_W_VL)
21084NODE_NAME_CASE(VFWSUB_W_VL)
21085NODE_NAME_CASE(VWMACC_VL)
21086NODE_NAME_CASE(VWMACCU_VL)
21087NODE_NAME_CASE(VWMACCSU_VL)
21088NODE_NAME_CASE(SETCC_VL)
21089NODE_NAME_CASE(VMERGE_VL)
21090NODE_NAME_CASE(VMAND_VL)
21091NODE_NAME_CASE(VMOR_VL)
21092NODE_NAME_CASE(VMXOR_VL)
21093NODE_NAME_CASE(VMCLR_VL)
21094NODE_NAME_CASE(VMSET_VL)
21095NODE_NAME_CASE(VRGATHER_VX_VL)
21096NODE_NAME_CASE(VRGATHER_VV_VL)
21097NODE_NAME_CASE(VRGATHEREI16_VV_VL)
21098NODE_NAME_CASE(VSEXT_VL)
21099NODE_NAME_CASE(VZEXT_VL)
21100NODE_NAME_CASE(VCPOP_VL)
21101NODE_NAME_CASE(VFIRST_VL)
21102NODE_NAME_CASE(READ_CSR)
21103NODE_NAME_CASE(WRITE_CSR)
21104NODE_NAME_CASE(SWAP_CSR)
21105NODE_NAME_CASE(CZERO_EQZ)
21106NODE_NAME_CASE(CZERO_NEZ)
21107NODE_NAME_CASE(SW_GUARDED_BRIND)
21108NODE_NAME_CASE(SW_GUARDED_CALL)
21109NODE_NAME_CASE(SW_GUARDED_TAIL)
21110NODE_NAME_CASE(TUPLE_INSERT)
21111NODE_NAME_CASE(TUPLE_EXTRACT)
21112NODE_NAME_CASE(SF_VC_XV_SE)
21113NODE_NAME_CASE(SF_VC_IV_SE)
21114NODE_NAME_CASE(SF_VC_VV_SE)
21115NODE_NAME_CASE(SF_VC_FV_SE)
21116NODE_NAME_CASE(SF_VC_XVV_SE)
21117NODE_NAME_CASE(SF_VC_IVV_SE)
21118NODE_NAME_CASE(SF_VC_VVV_SE)
21119NODE_NAME_CASE(SF_VC_FVV_SE)
21120NODE_NAME_CASE(SF_VC_XVW_SE)
21121NODE_NAME_CASE(SF_VC_IVW_SE)
21122NODE_NAME_CASE(SF_VC_VVW_SE)
21123NODE_NAME_CASE(SF_VC_FVW_SE)
21124NODE_NAME_CASE(SF_VC_V_X_SE)
21125NODE_NAME_CASE(SF_VC_V_I_SE)
21126NODE_NAME_CASE(SF_VC_V_XV_SE)
21127NODE_NAME_CASE(SF_VC_V_IV_SE)
21128NODE_NAME_CASE(SF_VC_V_VV_SE)
21129NODE_NAME_CASE(SF_VC_V_FV_SE)
21130NODE_NAME_CASE(SF_VC_V_XVV_SE)
21131NODE_NAME_CASE(SF_VC_V_IVV_SE)
21132NODE_NAME_CASE(SF_VC_V_VVV_SE)
21133NODE_NAME_CASE(SF_VC_V_FVV_SE)
21134NODE_NAME_CASE(SF_VC_V_XVW_SE)
21135NODE_NAME_CASE(SF_VC_V_IVW_SE)
21136NODE_NAME_CASE(SF_VC_V_VVW_SE)
21137NODE_NAME_CASE(SF_VC_V_FVW_SE)
21138NODE_NAME_CASE(PROBED_ALLOCA)
21139 }
21140// clang-format on
21141returnnullptr;
21142#undef NODE_NAME_CASE
21143}
21144
21145/// getConstraintType - Given a constraint letter, return the type of
21146/// constraint it is for this target.
21147RISCVTargetLowering::ConstraintType
21148RISCVTargetLowering::getConstraintType(StringRef Constraint) const{
21149if (Constraint.size() == 1) {
21150switch (Constraint[0]) {
21151default:
21152break;
21153case'f':
21154case'R':
21155returnC_RegisterClass;
21156case'I':
21157case'J':
21158case'K':
21159returnC_Immediate;
21160case'A':
21161returnC_Memory;
21162case's':
21163case'S':// A symbolic address
21164returnC_Other;
21165 }
21166 }else {
21167if (Constraint =="vr" || Constraint =="vd" || Constraint =="vm")
21168returnC_RegisterClass;
21169if (Constraint =="cr" || Constraint =="cR" || Constraint =="cf")
21170returnC_RegisterClass;
21171 }
21172returnTargetLowering::getConstraintType(Constraint);
21173}
21174
21175std::pair<unsigned, const TargetRegisterClass *>
21176RISCVTargetLowering::getRegForInlineAsmConstraint(constTargetRegisterInfo *TRI,
21177StringRef Constraint,
21178MVT VT) const{
21179// First, see if this is a constraint that directly corresponds to a RISC-V
21180// register class.
21181if (Constraint.size() == 1) {
21182switch (Constraint[0]) {
21183case'r':
21184// TODO: Support fixed vectors up to XLen for P extension?
21185if (VT.isVector())
21186break;
21187if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21188return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21189if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21190return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21191if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21192return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21193return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21194case'f':
21195if (VT == MVT::f16) {
21196if (Subtarget.hasStdExtZfhmin())
21197return std::make_pair(0U, &RISCV::FPR16RegClass);
21198if (Subtarget.hasStdExtZhinxmin())
21199return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21200 }elseif (VT == MVT::f32) {
21201if (Subtarget.hasStdExtF())
21202return std::make_pair(0U, &RISCV::FPR32RegClass);
21203if (Subtarget.hasStdExtZfinx())
21204return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21205 }elseif (VT == MVT::f64) {
21206if (Subtarget.hasStdExtD())
21207return std::make_pair(0U, &RISCV::FPR64RegClass);
21208if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21209return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21210if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21211return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21212 }
21213break;
21214case'R':
21215return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21216default:
21217break;
21218 }
21219 }elseif (Constraint =="vr") {
21220for (constauto *RC :
21221 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
21222 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
21223 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
21224 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
21225 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
21226 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
21227 &RISCV::VRN2M4RegClass}) {
21228if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21229return std::make_pair(0U, RC);
21230 }
21231 }elseif (Constraint =="vd") {
21232for (constauto *RC :
21233 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
21234 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
21235 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
21236 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
21237 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
21238 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
21239 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
21240 &RISCV::VRN2M4NoV0RegClass}) {
21241if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21242return std::make_pair(0U, RC);
21243 }
21244 }elseif (Constraint =="vm") {
21245if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
21246return std::make_pair(0U, &RISCV::VMV0RegClass);
21247 }elseif (Constraint =="cr") {
21248if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21249return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21250if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21251return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21252if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21253return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21254if (!VT.isVector())
21255return std::make_pair(0U, &RISCV::GPRCRegClass);
21256 }elseif (Constraint =="cR") {
21257return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21258 }elseif (Constraint =="cf") {
21259if (VT == MVT::f16) {
21260if (Subtarget.hasStdExtZfhmin())
21261return std::make_pair(0U, &RISCV::FPR16CRegClass);
21262if (Subtarget.hasStdExtZhinxmin())
21263return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21264 }elseif (VT == MVT::f32) {
21265if (Subtarget.hasStdExtF())
21266return std::make_pair(0U, &RISCV::FPR32CRegClass);
21267if (Subtarget.hasStdExtZfinx())
21268return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21269 }elseif (VT == MVT::f64) {
21270if (Subtarget.hasStdExtD())
21271return std::make_pair(0U, &RISCV::FPR64CRegClass);
21272if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21273return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21274if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21275return std::make_pair(0U, &RISCV::GPRCRegClass);
21276 }
21277 }
21278
21279// Clang will correctly decode the usage of register name aliases into their
21280// official names. However, other frontends like `rustc` do not. This allows
21281// users of these frontends to use the ABI names for registers in LLVM-style
21282// register constraints.
21283unsigned XRegFromAlias =StringSwitch<unsigned>(Constraint.lower())
21284 .Case("{zero}", RISCV::X0)
21285 .Case("{ra}", RISCV::X1)
21286 .Case("{sp}", RISCV::X2)
21287 .Case("{gp}", RISCV::X3)
21288 .Case("{tp}", RISCV::X4)
21289 .Case("{t0}", RISCV::X5)
21290 .Case("{t1}", RISCV::X6)
21291 .Case("{t2}", RISCV::X7)
21292 .Cases("{s0}","{fp}", RISCV::X8)
21293 .Case("{s1}", RISCV::X9)
21294 .Case("{a0}", RISCV::X10)
21295 .Case("{a1}", RISCV::X11)
21296 .Case("{a2}", RISCV::X12)
21297 .Case("{a3}", RISCV::X13)
21298 .Case("{a4}", RISCV::X14)
21299 .Case("{a5}", RISCV::X15)
21300 .Case("{a6}", RISCV::X16)
21301 .Case("{a7}", RISCV::X17)
21302 .Case("{s2}", RISCV::X18)
21303 .Case("{s3}", RISCV::X19)
21304 .Case("{s4}", RISCV::X20)
21305 .Case("{s5}", RISCV::X21)
21306 .Case("{s6}", RISCV::X22)
21307 .Case("{s7}", RISCV::X23)
21308 .Case("{s8}", RISCV::X24)
21309 .Case("{s9}", RISCV::X25)
21310 .Case("{s10}", RISCV::X26)
21311 .Case("{s11}", RISCV::X27)
21312 .Case("{t3}", RISCV::X28)
21313 .Case("{t4}", RISCV::X29)
21314 .Case("{t5}", RISCV::X30)
21315 .Case("{t6}", RISCV::X31)
21316 .Default(RISCV::NoRegister);
21317if (XRegFromAlias != RISCV::NoRegister)
21318return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21319
21320// Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21321// TableGen record rather than the AsmName to choose registers for InlineAsm
21322// constraints, plus we want to match those names to the widest floating point
21323// register type available, manually select floating point registers here.
21324//
21325// The second case is the ABI name of the register, so that frontends can also
21326// use the ABI names in register constraint lists.
21327if (Subtarget.hasStdExtF()) {
21328unsigned FReg =StringSwitch<unsigned>(Constraint.lower())
21329 .Cases("{f0}","{ft0}", RISCV::F0_F)
21330 .Cases("{f1}","{ft1}", RISCV::F1_F)
21331 .Cases("{f2}","{ft2}", RISCV::F2_F)
21332 .Cases("{f3}","{ft3}", RISCV::F3_F)
21333 .Cases("{f4}","{ft4}", RISCV::F4_F)
21334 .Cases("{f5}","{ft5}", RISCV::F5_F)
21335 .Cases("{f6}","{ft6}", RISCV::F6_F)
21336 .Cases("{f7}","{ft7}", RISCV::F7_F)
21337 .Cases("{f8}","{fs0}", RISCV::F8_F)
21338 .Cases("{f9}","{fs1}", RISCV::F9_F)
21339 .Cases("{f10}","{fa0}", RISCV::F10_F)
21340 .Cases("{f11}","{fa1}", RISCV::F11_F)
21341 .Cases("{f12}","{fa2}", RISCV::F12_F)
21342 .Cases("{f13}","{fa3}", RISCV::F13_F)
21343 .Cases("{f14}","{fa4}", RISCV::F14_F)
21344 .Cases("{f15}","{fa5}", RISCV::F15_F)
21345 .Cases("{f16}","{fa6}", RISCV::F16_F)
21346 .Cases("{f17}","{fa7}", RISCV::F17_F)
21347 .Cases("{f18}","{fs2}", RISCV::F18_F)
21348 .Cases("{f19}","{fs3}", RISCV::F19_F)
21349 .Cases("{f20}","{fs4}", RISCV::F20_F)
21350 .Cases("{f21}","{fs5}", RISCV::F21_F)
21351 .Cases("{f22}","{fs6}", RISCV::F22_F)
21352 .Cases("{f23}","{fs7}", RISCV::F23_F)
21353 .Cases("{f24}","{fs8}", RISCV::F24_F)
21354 .Cases("{f25}","{fs9}", RISCV::F25_F)
21355 .Cases("{f26}","{fs10}", RISCV::F26_F)
21356 .Cases("{f27}","{fs11}", RISCV::F27_F)
21357 .Cases("{f28}","{ft8}", RISCV::F28_F)
21358 .Cases("{f29}","{ft9}", RISCV::F29_F)
21359 .Cases("{f30}","{ft10}", RISCV::F30_F)
21360 .Cases("{f31}","{ft11}", RISCV::F31_F)
21361 .Default(RISCV::NoRegister);
21362if (FReg != RISCV::NoRegister) {
21363assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F &&"Unknown fp-reg");
21364if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21365unsigned RegNo = FReg - RISCV::F0_F;
21366unsigned DReg = RISCV::F0_D + RegNo;
21367return std::make_pair(DReg, &RISCV::FPR64RegClass);
21368 }
21369if (VT == MVT::f32 || VT == MVT::Other)
21370return std::make_pair(FReg, &RISCV::FPR32RegClass);
21371if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21372unsigned RegNo = FReg - RISCV::F0_F;
21373unsigned HReg = RISCV::F0_H + RegNo;
21374return std::make_pair(HReg, &RISCV::FPR16RegClass);
21375 }
21376 }
21377 }
21378
21379if (Subtarget.hasVInstructions()) {
21380Register VReg =StringSwitch<Register>(Constraint.lower())
21381 .Case("{v0}", RISCV::V0)
21382 .Case("{v1}", RISCV::V1)
21383 .Case("{v2}", RISCV::V2)
21384 .Case("{v3}", RISCV::V3)
21385 .Case("{v4}", RISCV::V4)
21386 .Case("{v5}", RISCV::V5)
21387 .Case("{v6}", RISCV::V6)
21388 .Case("{v7}", RISCV::V7)
21389 .Case("{v8}", RISCV::V8)
21390 .Case("{v9}", RISCV::V9)
21391 .Case("{v10}", RISCV::V10)
21392 .Case("{v11}", RISCV::V11)
21393 .Case("{v12}", RISCV::V12)
21394 .Case("{v13}", RISCV::V13)
21395 .Case("{v14}", RISCV::V14)
21396 .Case("{v15}", RISCV::V15)
21397 .Case("{v16}", RISCV::V16)
21398 .Case("{v17}", RISCV::V17)
21399 .Case("{v18}", RISCV::V18)
21400 .Case("{v19}", RISCV::V19)
21401 .Case("{v20}", RISCV::V20)
21402 .Case("{v21}", RISCV::V21)
21403 .Case("{v22}", RISCV::V22)
21404 .Case("{v23}", RISCV::V23)
21405 .Case("{v24}", RISCV::V24)
21406 .Case("{v25}", RISCV::V25)
21407 .Case("{v26}", RISCV::V26)
21408 .Case("{v27}", RISCV::V27)
21409 .Case("{v28}", RISCV::V28)
21410 .Case("{v29}", RISCV::V29)
21411 .Case("{v30}", RISCV::V30)
21412 .Case("{v31}", RISCV::V31)
21413 .Default(RISCV::NoRegister);
21414if (VReg != RISCV::NoRegister) {
21415if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21416return std::make_pair(VReg, &RISCV::VMRegClass);
21417if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21418return std::make_pair(VReg, &RISCV::VRRegClass);
21419for (constauto *RC :
21420 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21421if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21422 VReg =TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21423return std::make_pair(VReg, RC);
21424 }
21425 }
21426 }
21427 }
21428
21429 std::pair<Register, const TargetRegisterClass *> Res =
21430TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
21431
21432// If we picked one of the Zfinx register classes, remap it to the GPR class.
21433// FIXME: When Zfinx is supported in CodeGen this will need to take the
21434// Subtarget into account.
21435if (Res.second == &RISCV::GPRF16RegClass ||
21436 Res.second == &RISCV::GPRF32RegClass ||
21437 Res.second == &RISCV::GPRPairRegClass)
21438return std::make_pair(Res.first, &RISCV::GPRRegClass);
21439
21440return Res;
21441}
21442
21443InlineAsm::ConstraintCode
21444RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const{
21445// Currently only support length 1 constraints.
21446if (ConstraintCode.size() == 1) {
21447switch (ConstraintCode[0]) {
21448case'A':
21449returnInlineAsm::ConstraintCode::A;
21450default:
21451break;
21452 }
21453 }
21454
21455returnTargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21456}
21457
21458voidRISCVTargetLowering::LowerAsmOperandForConstraint(
21459SDValueOp,StringRef Constraint, std::vector<SDValue> &Ops,
21460SelectionDAG &DAG) const{
21461// Currently only support length 1 constraints.
21462if (Constraint.size() == 1) {
21463switch (Constraint[0]) {
21464case'I':
21465// Validate & create a 12-bit signed immediate operand.
21466if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21467uint64_t CVal =C->getSExtValue();
21468if (isInt<12>(CVal))
21469 Ops.push_back(DAG.getSignedTargetConstant(CVal,SDLoc(Op),
21470 Subtarget.getXLenVT()));
21471 }
21472return;
21473case'J':
21474// Validate & create an integer zero operand.
21475if (isNullConstant(Op))
21476 Ops.push_back(
21477 DAG.getTargetConstant(0,SDLoc(Op), Subtarget.getXLenVT()));
21478return;
21479case'K':
21480// Validate & create a 5-bit unsigned immediate operand.
21481if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21482uint64_t CVal =C->getZExtValue();
21483if (isUInt<5>(CVal))
21484 Ops.push_back(
21485 DAG.getTargetConstant(CVal,SDLoc(Op), Subtarget.getXLenVT()));
21486 }
21487return;
21488case'S':
21489TargetLowering::LowerAsmOperandForConstraint(Op,"s", Ops, DAG);
21490return;
21491default:
21492break;
21493 }
21494 }
21495TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21496}
21497
21498Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
21499Instruction *Inst,
21500AtomicOrdering Ord) const{
21501if (Subtarget.hasStdExtZtso()) {
21502if (isa<LoadInst>(Inst) && Ord ==AtomicOrdering::SequentiallyConsistent)
21503return Builder.CreateFence(Ord);
21504returnnullptr;
21505 }
21506
21507if (isa<LoadInst>(Inst) && Ord ==AtomicOrdering::SequentiallyConsistent)
21508return Builder.CreateFence(Ord);
21509if (isa<StoreInst>(Inst) &&isReleaseOrStronger(Ord))
21510return Builder.CreateFence(AtomicOrdering::Release);
21511returnnullptr;
21512}
21513
21514Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
21515Instruction *Inst,
21516AtomicOrdering Ord) const{
21517if (Subtarget.hasStdExtZtso()) {
21518if (isa<StoreInst>(Inst) && Ord ==AtomicOrdering::SequentiallyConsistent)
21519return Builder.CreateFence(Ord);
21520returnnullptr;
21521 }
21522
21523if (isa<LoadInst>(Inst) &&isAcquireOrStronger(Ord))
21524return Builder.CreateFence(AtomicOrdering::Acquire);
21525if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21526 Ord ==AtomicOrdering::SequentiallyConsistent)
21527return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
21528returnnullptr;
21529}
21530
21531TargetLowering::AtomicExpansionKind
21532RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const{
21533// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21534// point operations can't be used in an lr/sc sequence without breaking the
21535// forward-progress guarantee.
21536if (AI->isFloatingPointOperation() ||
21537 AI->getOperation() ==AtomicRMWInst::UIncWrap ||
21538 AI->getOperation() ==AtomicRMWInst::UDecWrap ||
21539 AI->getOperation() ==AtomicRMWInst::USubCond ||
21540 AI->getOperation() ==AtomicRMWInst::USubSat)
21541returnAtomicExpansionKind::CmpXChg;
21542
21543// Don't expand forced atomics, we want to have __sync libcalls instead.
21544if (Subtarget.hasForcedAtomics())
21545returnAtomicExpansionKind::None;
21546
21547unsignedSize = AI->getType()->getPrimitiveSizeInBits();
21548if (AI->getOperation() ==AtomicRMWInst::Nand) {
21549if (Subtarget.hasStdExtZacas() &&
21550 (Size >= 32 || Subtarget.hasStdExtZabha()))
21551returnAtomicExpansionKind::CmpXChg;
21552if (Size < 32)
21553returnAtomicExpansionKind::MaskedIntrinsic;
21554 }
21555
21556if (Size < 32 && !Subtarget.hasStdExtZabha())
21557returnAtomicExpansionKind::MaskedIntrinsic;
21558
21559returnAtomicExpansionKind::None;
21560}
21561
21562staticIntrinsic::ID
21563getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen,AtomicRMWInst::BinOp BinOp) {
21564if (XLen == 32) {
21565switch (BinOp) {
21566default:
21567llvm_unreachable("Unexpected AtomicRMW BinOp");
21568caseAtomicRMWInst::Xchg:
21569return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21570caseAtomicRMWInst::Add:
21571return Intrinsic::riscv_masked_atomicrmw_add_i32;
21572caseAtomicRMWInst::Sub:
21573return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21574caseAtomicRMWInst::Nand:
21575return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21576caseAtomicRMWInst::Max:
21577return Intrinsic::riscv_masked_atomicrmw_max_i32;
21578caseAtomicRMWInst::Min:
21579return Intrinsic::riscv_masked_atomicrmw_min_i32;
21580caseAtomicRMWInst::UMax:
21581return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21582caseAtomicRMWInst::UMin:
21583return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21584 }
21585 }
21586
21587if (XLen == 64) {
21588switch (BinOp) {
21589default:
21590llvm_unreachable("Unexpected AtomicRMW BinOp");
21591caseAtomicRMWInst::Xchg:
21592return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21593caseAtomicRMWInst::Add:
21594return Intrinsic::riscv_masked_atomicrmw_add_i64;
21595caseAtomicRMWInst::Sub:
21596return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21597caseAtomicRMWInst::Nand:
21598return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21599caseAtomicRMWInst::Max:
21600return Intrinsic::riscv_masked_atomicrmw_max_i64;
21601caseAtomicRMWInst::Min:
21602return Intrinsic::riscv_masked_atomicrmw_min_i64;
21603caseAtomicRMWInst::UMax:
21604return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21605caseAtomicRMWInst::UMin:
21606return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21607 }
21608 }
21609
21610llvm_unreachable("Unexpected XLen\n");
21611}
21612
21613Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
21614IRBuilderBase &Builder,AtomicRMWInst *AI,Value *AlignedAddr,Value *Incr,
21615Value *Mask,Value *ShiftAmt,AtomicOrdering Ord) const{
21616// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21617// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21618// mask, as this produces better code than the LR/SC loop emitted by
21619// int_riscv_masked_atomicrmw_xchg.
21620if (AI->getOperation() ==AtomicRMWInst::Xchg &&
21621 isa<ConstantInt>(AI->getValOperand())) {
21622ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21623if (CVal->isZero())
21624return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21625 Builder.CreateNot(Mask,"Inv_Mask"),
21626 AI->getAlign(), Ord);
21627if (CVal->isMinusOne())
21628return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21629 AI->getAlign(), Ord);
21630 }
21631
21632unsigned XLen = Subtarget.getXLen();
21633Value *Ordering =
21634 Builder.getIntN(XLen,static_cast<uint64_t>(AI->getOrdering()));
21635Type *Tys[] = {AlignedAddr->getType()};
21636Function *LrwOpScwLoop =Intrinsic::getOrInsertDeclaration(
21637 AI->getModule(),
21638getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
21639
21640if (XLen == 64) {
21641 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21642 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21643 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21644 }
21645
21646Value *Result;
21647
21648// Must pass the shift amount needed to sign extend the loaded value prior
21649// to performing a signed comparison for min/max. ShiftAmt is the number of
21650// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21651// is the number of bits to left+right shift the value in order to
21652// sign-extend.
21653if (AI->getOperation() ==AtomicRMWInst::Min ||
21654 AI->getOperation() ==AtomicRMWInst::Max) {
21655constDataLayout &DL = AI->getDataLayout();
21656unsigned ValWidth =
21657DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21658Value *SextShamt =
21659 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21660 Result = Builder.CreateCall(LrwOpScwLoop,
21661 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21662 }else {
21663 Result =
21664 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21665 }
21666
21667if (XLen == 64)
21668 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21669return Result;
21670}
21671
21672TargetLowering::AtomicExpansionKind
21673RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
21674AtomicCmpXchgInst *CI) const{
21675// Don't expand forced atomics, we want to have __sync libcalls instead.
21676if (Subtarget.hasForcedAtomics())
21677returnAtomicExpansionKind::None;
21678
21679unsignedSize = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
21680if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21681 (Size == 8 ||Size == 16))
21682returnAtomicExpansionKind::MaskedIntrinsic;
21683returnAtomicExpansionKind::None;
21684}
21685
21686Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
21687IRBuilderBase &Builder,AtomicCmpXchgInst *CI,Value *AlignedAddr,
21688Value *CmpVal,Value *NewVal,Value *Mask,AtomicOrdering Ord) const{
21689unsigned XLen = Subtarget.getXLen();
21690Value *Ordering = Builder.getIntN(XLen,static_cast<uint64_t>(Ord));
21691Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21692if (XLen == 64) {
21693 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21694 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21695 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21696 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21697 }
21698Type *Tys[] = {AlignedAddr->getType()};
21699Value *Result = Builder.CreateIntrinsic(
21700 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21701if (XLen == 64)
21702 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21703return Result;
21704}
21705
21706boolRISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
21707EVT DataVT) const{
21708// We have indexed loads for all supported EEW types. Indices are always
21709// zero extended.
21710return Extend.getOpcode() ==ISD::ZERO_EXTEND &&
21711isTypeLegal(Extend.getValueType()) &&
21712isTypeLegal(Extend.getOperand(0).getValueType()) &&
21713 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21714}
21715
21716boolRISCVTargetLowering::shouldConvertFpToSat(unsignedOp,EVT FPVT,
21717EVT VT) const{
21718if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21719returnfalse;
21720
21721switch (FPVT.getSimpleVT().SimpleTy) {
21722case MVT::f16:
21723return Subtarget.hasStdExtZfhmin();
21724case MVT::f32:
21725return Subtarget.hasStdExtF();
21726case MVT::f64:
21727return Subtarget.hasStdExtD();
21728default:
21729returnfalse;
21730 }
21731}
21732
21733unsignedRISCVTargetLowering::getJumpTableEncoding() const{
21734// If we are using the small code model, we can reduce size of jump table
21735// entry to 4 bytes.
21736if (Subtarget.is64Bit() && !isPositionIndependent() &&
21737getTargetMachine().getCodeModel() ==CodeModel::Small) {
21738returnMachineJumpTableInfo::EK_Custom32;
21739 }
21740returnTargetLowering::getJumpTableEncoding();
21741}
21742
21743constMCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
21744constMachineJumpTableInfo *MJTI,constMachineBasicBlock *MBB,
21745unsigned uid,MCContext &Ctx) const{
21746assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21747getTargetMachine().getCodeModel() ==CodeModel::Small);
21748returnMCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21749}
21750
21751boolRISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const{
21752// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21753// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21754// a power of two as well.
21755// FIXME: This doesn't work for zve32, but that's already broken
21756// elsewhere for the same reason.
21757assert(Subtarget.getRealMinVLen() >= 64 &&"zve32* unsupported");
21758static_assert(RISCV::RVVBitsPerBlock == 64,
21759"RVVBitsPerBlock changed, audit needed");
21760returntrue;
21761}
21762
21763boolRISCVTargetLowering::getIndexedAddressParts(SDNode *Op,SDValue &Base,
21764SDValue &Offset,
21765ISD::MemIndexedMode &AM,
21766SelectionDAG &DAG) const{
21767// Target does not support indexed loads.
21768if (!Subtarget.hasVendorXTHeadMemIdx())
21769returnfalse;
21770
21771if (Op->getOpcode() !=ISD::ADD &&Op->getOpcode() !=ISD::SUB)
21772returnfalse;
21773
21774Base =Op->getOperand(0);
21775if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21776 int64_t RHSC =RHS->getSExtValue();
21777if (Op->getOpcode() ==ISD::SUB)
21778 RHSC = -(uint64_t)RHSC;
21779
21780// The constants that can be encoded in the THeadMemIdx instructions
21781// are of the form (sign_extend(imm5) << imm2).
21782bool isLegalIndexedOffset =false;
21783for (unsigned i = 0; i < 4; i++)
21784if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21785 isLegalIndexedOffset =true;
21786break;
21787 }
21788
21789if (!isLegalIndexedOffset)
21790returnfalse;
21791
21792Offset =Op->getOperand(1);
21793returntrue;
21794 }
21795
21796returnfalse;
21797}
21798
21799boolRISCVTargetLowering::getPreIndexedAddressParts(SDNode *N,SDValue &Base,
21800SDValue &Offset,
21801ISD::MemIndexedMode &AM,
21802SelectionDAG &DAG) const{
21803EVT VT;
21804SDValuePtr;
21805if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21806 VT = LD->getMemoryVT();
21807Ptr = LD->getBasePtr();
21808 }elseif (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21809 VT = ST->getMemoryVT();
21810Ptr = ST->getBasePtr();
21811 }else
21812returnfalse;
21813
21814if (!getIndexedAddressParts(Ptr.getNode(),Base,Offset, AM, DAG))
21815returnfalse;
21816
21817 AM =ISD::PRE_INC;
21818returntrue;
21819}
21820
21821boolRISCVTargetLowering::getPostIndexedAddressParts(SDNode *N,SDNode *Op,
21822SDValue &Base,
21823SDValue &Offset,
21824ISD::MemIndexedMode &AM,
21825SelectionDAG &DAG) const{
21826if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21827if (Op->getOpcode() !=ISD::ADD)
21828returnfalse;
21829
21830if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21831Base = LS->getBasePtr();
21832else
21833returnfalse;
21834
21835if (Base ==Op->getOperand(0))
21836Offset =Op->getOperand(1);
21837elseif (Base ==Op->getOperand(1))
21838Offset =Op->getOperand(0);
21839else
21840returnfalse;
21841
21842 AM =ISD::POST_INC;
21843returntrue;
21844 }
21845
21846EVT VT;
21847SDValuePtr;
21848if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21849 VT = LD->getMemoryVT();
21850Ptr = LD->getBasePtr();
21851 }elseif (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21852 VT = ST->getMemoryVT();
21853Ptr = ST->getBasePtr();
21854 }else
21855returnfalse;
21856
21857if (!getIndexedAddressParts(Op,Base,Offset, AM, DAG))
21858returnfalse;
21859// Post-indexing updates the base, so it's not a valid transform
21860// if that's not the same as the load's pointer.
21861if (Ptr !=Base)
21862returnfalse;
21863
21864 AM =ISD::POST_INC;
21865returntrue;
21866}
21867
21868boolRISCVTargetLowering::isFMAFasterThanFMulAndFAdd(constMachineFunction &MF,
21869EVT VT) const{
21870EVT SVT = VT.getScalarType();
21871
21872if (!SVT.isSimple())
21873returnfalse;
21874
21875switch (SVT.getSimpleVT().SimpleTy) {
21876case MVT::f16:
21877return VT.isVector() ? Subtarget.hasVInstructionsF16()
21878 : Subtarget.hasStdExtZfhOrZhinx();
21879case MVT::f32:
21880return Subtarget.hasStdExtFOrZfinx();
21881case MVT::f64:
21882return Subtarget.hasStdExtDOrZdinx();
21883default:
21884break;
21885 }
21886
21887returnfalse;
21888}
21889
21890ISD::NodeTypeRISCVTargetLowering::getExtendForAtomicCmpSwapArg() const{
21891// Zacas will use amocas.w which does not require extension.
21892return Subtarget.hasStdExtZacas() ?ISD::ANY_EXTEND :ISD::SIGN_EXTEND;
21893}
21894
21895RegisterRISCVTargetLowering::getExceptionPointerRegister(
21896constConstant *PersonalityFn) const{
21897return RISCV::X10;
21898}
21899
21900RegisterRISCVTargetLowering::getExceptionSelectorRegister(
21901constConstant *PersonalityFn) const{
21902return RISCV::X11;
21903}
21904
21905boolRISCVTargetLowering::shouldExtendTypeInLibCall(EVTType) const{
21906// Return false to suppress the unnecessary extensions if the LibCall
21907// arguments or return value is a float narrower than XLEN on a soft FP ABI.
21908if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21909Type.getSizeInBits() < Subtarget.getXLen()))
21910returnfalse;
21911
21912returntrue;
21913}
21914
21915boolRISCVTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty,
21916bool IsSigned) const{
21917if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
21918returntrue;
21919
21920return IsSigned;
21921}
21922
21923boolRISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context,EVT VT,
21924SDValueC) const{
21925// Check integral scalar types.
21926if (!VT.isScalarInteger())
21927returnfalse;
21928
21929// Omit the optimization if the sub target has the M extension and the data
21930// size exceeds XLen.
21931constbool HasZmmul = Subtarget.hasStdExtZmmul();
21932if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21933returnfalse;
21934
21935auto *ConstNode = cast<ConstantSDNode>(C);
21936constAPInt &Imm = ConstNode->getAPIntValue();
21937
21938// Break the MUL to a SLLI and an ADD/SUB.
21939if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21940 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21941returntrue;
21942
21943// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21944if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21945 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21946 (Imm - 8).isPowerOf2()))
21947returntrue;
21948
21949// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21950// a pair of LUI/ADDI.
21951if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21952 ConstNode->hasOneUse()) {
21953APInt ImmS = Imm.ashr(Imm.countr_zero());
21954if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21955 (1 - ImmS).isPowerOf2())
21956returntrue;
21957 }
21958
21959returnfalse;
21960}
21961
21962boolRISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
21963SDValue ConstNode) const{
21964// Let the DAGCombiner decide for vectors.
21965EVT VT = AddNode.getValueType();
21966if (VT.isVector())
21967returntrue;
21968
21969// Let the DAGCombiner decide for larger types.
21970if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21971returntrue;
21972
21973// It is worse if c1 is simm12 while c1*c2 is not.
21974ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21975ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21976constAPInt &C1 = C1Node->getAPIntValue();
21977constAPInt &C2 = C2Node->getAPIntValue();
21978if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21979returnfalse;
21980
21981// Default to true and let the DAGCombiner decide.
21982returntrue;
21983}
21984
21985boolRISCVTargetLowering::allowsMisalignedMemoryAccesses(
21986EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,
21987unsigned *Fast) const{
21988if (!VT.isVector()) {
21989if (Fast)
21990 *Fast = Subtarget.enableUnalignedScalarMem();
21991return Subtarget.enableUnalignedScalarMem();
21992 }
21993
21994// All vector implementations must support element alignment
21995EVT ElemVT = VT.getVectorElementType();
21996if (Alignment >= ElemVT.getStoreSize()) {
21997if (Fast)
21998 *Fast = 1;
21999returntrue;
22000 }
22001
22002// Note: We lower an unmasked unaligned vector access to an equally sized
22003// e8 element type access. Given this, we effectively support all unmasked
22004// misaligned accesses. TODO: Work through the codegen implications of
22005// allowing such accesses to be formed, and considered fast.
22006if (Fast)
22007 *Fast = Subtarget.enableUnalignedVectorMem();
22008return Subtarget.enableUnalignedVectorMem();
22009}
22010
22011
22012EVTRISCVTargetLowering::getOptimalMemOpType(constMemOp &Op,
22013constAttributeList &FuncAttributes) const{
22014if (!Subtarget.hasVInstructions())
22015return MVT::Other;
22016
22017if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
22018return MVT::Other;
22019
22020// We use LMUL1 memory operations here for a non-obvious reason. Our caller
22021// has an expansion threshold, and we want the number of hardware memory
22022// operations to correspond roughly to that threshold. LMUL>1 operations
22023// are typically expanded linearly internally, and thus correspond to more
22024// than one actual memory operation. Note that store merging and load
22025// combining will typically form larger LMUL operations from the LMUL1
22026// operations emitted here, and that's okay because combining isn't
22027// introducing new memory operations; it's just merging existing ones.
22028constunsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
22029if (Op.size() < MinVLenInBytes)
22030// TODO: Figure out short memops. For the moment, do the default thing
22031// which ends up using scalar sequences.
22032return MVT::Other;
22033
22034// If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
22035// fixed vectors.
22036if (MinVLenInBytes <=RISCV::RVVBitsPerBlock / 8)
22037return MVT::Other;
22038
22039// Prefer i8 for non-zero memset as it allows us to avoid materializing
22040// a large scalar constant and instead use vmv.v.x/i to do the
22041// broadcast. For everything else, prefer ELenVT to minimize VL and thus
22042// maximize the chance we can encode the size in the vsetvli.
22043MVT ELenVT =MVT::getIntegerVT(Subtarget.getELen());
22044MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
22045
22046// Do we have sufficient alignment for our preferred VT? If not, revert
22047// to largest size allowed by our alignment criteria.
22048if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
22049Align RequiredAlign(PreferredVT.getStoreSize());
22050if (Op.isFixedDstAlign())
22051 RequiredAlign = std::min(RequiredAlign,Op.getDstAlign());
22052if (Op.isMemcpy())
22053 RequiredAlign = std::min(RequiredAlign,Op.getSrcAlign());
22054 PreferredVT =MVT::getIntegerVT(RequiredAlign.value() * 8);
22055 }
22056returnMVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
22057}
22058
22059boolRISCVTargetLowering::splitValueIntoRegisterParts(
22060SelectionDAG &DAG,constSDLoc &DL,SDValue Val,SDValue *Parts,
22061unsigned NumParts,MVT PartVT, std::optional<CallingConv::ID>CC) const{
22062bool IsABIRegCopy =CC.has_value();
22063EVT ValueVT = Val.getValueType();
22064
22065MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22066if ((ValueVT == PairVT ||
22067 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22068 ValueVT == MVT::f64)) &&
22069 NumParts == 1 && PartVT == MVT::Untyped) {
22070// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22071MVT XLenVT = Subtarget.getXLenVT();
22072if (ValueVT == MVT::f64)
22073 Val = DAG.getBitcast(MVT::i64, Val);
22074auto [Lo,Hi] = DAG.SplitScalar(Val,DL, XLenVT, XLenVT);
22075// Always creating an MVT::Untyped part, so always use
22076// RISCVISD::BuildGPRPair.
22077 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair,DL, PartVT,Lo,Hi);
22078returntrue;
22079 }
22080
22081if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22082 PartVT == MVT::f32) {
22083// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
22084// nan, and cast to f32.
22085 Val = DAG.getNode(ISD::BITCAST,DL, MVT::i16, Val);
22086 Val = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i32, Val);
22087 Val = DAG.getNode(ISD::OR,DL, MVT::i32, Val,
22088 DAG.getConstant(0xFFFF0000,DL, MVT::i32));
22089 Val = DAG.getNode(ISD::BITCAST,DL, PartVT, Val);
22090 Parts[0] = Val;
22091returntrue;
22092 }
22093
22094if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
22095#ifndef NDEBUG
22096unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
22097 [[maybe_unused]]unsigned ValLMUL =
22098divideCeil(ValueVT.getSizeInBits().getKnownMinValue(),
22099 ValNF *RISCV::RVVBitsPerBlock);
22100unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
22101 [[maybe_unused]]unsigned PartLMUL =
22102divideCeil(PartVT.getSizeInBits().getKnownMinValue(),
22103 PartNF *RISCV::RVVBitsPerBlock);
22104assert(ValNF == PartNF && ValLMUL == PartLMUL &&
22105"RISC-V vector tuple type only accepts same register class type "
22106"TUPLE_INSERT");
22107#endif
22108
22109 Val = DAG.getNode(RISCVISD::TUPLE_INSERT,DL, PartVT, DAG.getUNDEF(PartVT),
22110 Val, DAG.getVectorIdxConstant(0,DL));
22111 Parts[0] = Val;
22112returntrue;
22113 }
22114
22115if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22116LLVMContext &Context = *DAG.getContext();
22117EVT ValueEltVT = ValueVT.getVectorElementType();
22118EVT PartEltVT = PartVT.getVectorElementType();
22119unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22120unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22121if (PartVTBitSize % ValueVTBitSize == 0) {
22122assert(PartVTBitSize >= ValueVTBitSize);
22123// If the element types are different, bitcast to the same element type of
22124// PartVT first.
22125// Give an example here, we want copy a <vscale x 1 x i8> value to
22126// <vscale x 4 x i16>.
22127// We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
22128// subvector, then we can bitcast to <vscale x 4 x i16>.
22129if (ValueEltVT != PartEltVT) {
22130if (PartVTBitSize > ValueVTBitSize) {
22131unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22132assert(Count != 0 &&"The number of element should not be zero.");
22133EVT SameEltTypeVT =
22134EVT::getVectorVT(Context, ValueEltVT, Count,/*IsScalable=*/true);
22135 Val = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, SameEltTypeVT,
22136 DAG.getUNDEF(SameEltTypeVT), Val,
22137 DAG.getVectorIdxConstant(0,DL));
22138 }
22139 Val = DAG.getNode(ISD::BITCAST,DL, PartVT, Val);
22140 }else {
22141 Val =
22142 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, PartVT, DAG.getUNDEF(PartVT),
22143 Val, DAG.getVectorIdxConstant(0,DL));
22144 }
22145 Parts[0] = Val;
22146returntrue;
22147 }
22148 }
22149
22150returnfalse;
22151}
22152
22153SDValueRISCVTargetLowering::joinRegisterPartsIntoValue(
22154SelectionDAG &DAG,constSDLoc &DL,constSDValue *Parts,unsigned NumParts,
22155MVT PartVT,EVT ValueVT, std::optional<CallingConv::ID>CC) const{
22156bool IsABIRegCopy =CC.has_value();
22157
22158MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22159if ((ValueVT == PairVT ||
22160 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22161 ValueVT == MVT::f64)) &&
22162 NumParts == 1 && PartVT == MVT::Untyped) {
22163// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22164MVT XLenVT = Subtarget.getXLenVT();
22165
22166SDValue Val = Parts[0];
22167// Always starting with an MVT::Untyped part, so always use
22168// RISCVISD::SplitGPRPair
22169 Val = DAG.getNode(RISCVISD::SplitGPRPair,DL, DAG.getVTList(XLenVT, XLenVT),
22170 Val);
22171 Val = DAG.getNode(ISD::BUILD_PAIR,DL, PairVT, Val.getValue(0),
22172 Val.getValue(1));
22173if (ValueVT == MVT::f64)
22174 Val = DAG.getBitcast(ValueVT, Val);
22175return Val;
22176 }
22177
22178if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22179 PartVT == MVT::f32) {
22180SDValue Val = Parts[0];
22181
22182// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
22183 Val = DAG.getNode(ISD::BITCAST,DL, MVT::i32, Val);
22184 Val = DAG.getNode(ISD::TRUNCATE,DL, MVT::i16, Val);
22185 Val = DAG.getNode(ISD::BITCAST,DL, ValueVT, Val);
22186return Val;
22187 }
22188
22189if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22190LLVMContext &Context = *DAG.getContext();
22191SDValue Val = Parts[0];
22192EVT ValueEltVT = ValueVT.getVectorElementType();
22193EVT PartEltVT = PartVT.getVectorElementType();
22194unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22195unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22196if (PartVTBitSize % ValueVTBitSize == 0) {
22197assert(PartVTBitSize >= ValueVTBitSize);
22198EVT SameEltTypeVT = ValueVT;
22199// If the element types are different, convert it to the same element type
22200// of PartVT.
22201// Give an example here, we want copy a <vscale x 1 x i8> value from
22202// <vscale x 4 x i16>.
22203// We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
22204// then we can extract <vscale x 1 x i8>.
22205if (ValueEltVT != PartEltVT) {
22206unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22207assert(Count != 0 &&"The number of element should not be zero.");
22208 SameEltTypeVT =
22209EVT::getVectorVT(Context, ValueEltVT, Count,/*IsScalable=*/true);
22210 Val = DAG.getNode(ISD::BITCAST,DL, SameEltTypeVT, Val);
22211 }
22212 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ValueVT, Val,
22213 DAG.getVectorIdxConstant(0,DL));
22214return Val;
22215 }
22216 }
22217returnSDValue();
22218}
22219
22220boolRISCVTargetLowering::isIntDivCheap(EVT VT,AttributeList Attr) const{
22221// When aggressively optimizing for code size, we prefer to use a div
22222// instruction, as it is usually smaller than the alternative sequence.
22223// TODO: Add vector division?
22224bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
22225return OptSize && !VT.isVector();
22226}
22227
22228boolRISCVTargetLowering::preferScalarizeSplat(SDNode *N) const{
22229// Scalarize zero_ext and sign_ext might stop match to widening instruction in
22230// some situation.
22231unsigned Opc =N->getOpcode();
22232if (Opc ==ISD::ZERO_EXTEND || Opc ==ISD::SIGN_EXTEND)
22233returnfalse;
22234returntrue;
22235}
22236
22237staticValue *useTpOffset(IRBuilderBase &IRB,unsignedOffset) {
22238Module *M = IRB.GetInsertBlock()->getModule();
22239Function *ThreadPointerFunc =
22240Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
22241return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
22242 IRB.CreateCall(ThreadPointerFunc),Offset);
22243}
22244
22245Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const{
22246// Fuchsia provides a fixed TLS slot for the stack cookie.
22247// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
22248if (Subtarget.isTargetFuchsia())
22249returnuseTpOffset(IRB, -0x10);
22250
22251// Android provides a fixed TLS slot for the stack cookie. See the definition
22252// of TLS_SLOT_STACK_GUARD in
22253// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
22254if (Subtarget.isTargetAndroid())
22255returnuseTpOffset(IRB, -0x18);
22256
22257Module *M = IRB.GetInsertBlock()->getModule();
22258
22259if (M->getStackProtectorGuard() =="tls") {
22260// Users must specify the offset explicitly
22261intOffset = M->getStackProtectorGuardOffset();
22262returnuseTpOffset(IRB,Offset);
22263 }
22264
22265returnTargetLowering::getIRStackGuard(IRB);
22266}
22267
22268boolRISCVTargetLowering::isLegalInterleavedAccessType(
22269VectorType *VTy,unsigned Factor,Align Alignment,unsigned AddrSpace,
22270constDataLayout &DL) const{
22271EVT VT =getValueType(DL, VTy);
22272// Don't lower vlseg/vsseg for vector types that can't be split.
22273if (!isTypeLegal(VT))
22274returnfalse;
22275
22276if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
22277 !allowsMemoryAccessForAlignment(VTy->getContext(),DL, VT, AddrSpace,
22278 Alignment))
22279returnfalse;
22280
22281MVT ContainerVT = VT.getSimpleVT();
22282
22283if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22284if (!Subtarget.useRVVForFixedLengthVectors())
22285returnfalse;
22286// Sometimes the interleaved access pass picks up splats as interleaves of
22287// one element. Don't lower these.
22288if (FVTy->getNumElements() < 2)
22289returnfalse;
22290
22291 ContainerVT =getContainerForFixedLengthVector(VT.getSimpleVT());
22292 }else {
22293// The intrinsics for scalable vectors are not overloaded on pointer type
22294// and can only handle the default address space.
22295if (AddrSpace)
22296returnfalse;
22297 }
22298
22299// Need to make sure that EMUL * NFIELDS ≤ 8
22300auto [LMUL, Fractional] =RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
22301if (Fractional)
22302returntrue;
22303return Factor * LMUL <= 8;
22304}
22305
22306boolRISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
22307Align Alignment) const{
22308if (!Subtarget.hasVInstructions())
22309returnfalse;
22310
22311// Only support fixed vectors if we know the minimum vector size.
22312if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
22313returnfalse;
22314
22315EVT ScalarType = DataType.getScalarType();
22316if (!isLegalElementTypeForRVV(ScalarType))
22317returnfalse;
22318
22319if (!Subtarget.enableUnalignedVectorMem() &&
22320 Alignment < ScalarType.getStoreSize())
22321returnfalse;
22322
22323returntrue;
22324}
22325
22326staticconstIntrinsic::IDFixedVlsegIntrIds[] = {
22327 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22328 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22329 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22330 Intrinsic::riscv_seg8_load};
22331
22332/// Lower an interleaved load into a vlsegN intrinsic.
22333///
22334/// E.g. Lower an interleaved load (Factor = 2):
22335/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
22336/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
22337/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
22338///
22339/// Into:
22340/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
22341/// %ptr, i64 4)
22342/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22343/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22344boolRISCVTargetLowering::lowerInterleavedLoad(
22345LoadInst *LI,ArrayRef<ShuffleVectorInst *> Shuffles,
22346ArrayRef<unsigned> Indices,unsigned Factor) const{
22347assert(Indices.size() == Shuffles.size());
22348
22349IRBuilder<> Builder(LI);
22350
22351auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22352if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22353 LI->getPointerAddressSpace(),
22354 LI->getDataLayout()))
22355returnfalse;
22356
22357auto *XLenTy =Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22358
22359// If the segment load is going to be performed segment at a time anyways
22360// and there's only one element used, use a strided load instead. This
22361// will be equally fast, and create less vector register pressure.
22362if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22363unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22364Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22365Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22366Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(),Offset);
22367Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22368Value *VL = Builder.getInt32(VTy->getNumElements());
22369
22370CallInst *CI =
22371 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22372 {VTy, BasePtr->getType(), Stride->getType()},
22373 {BasePtr, Stride, Mask, VL});
22374 CI->addParamAttr(
22375 0,Attribute::getWithAlignment(CI->getContext(), LI->getAlign()));
22376 Shuffles[0]->replaceAllUsesWith(CI);
22377returntrue;
22378 };
22379
22380Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22381
22382CallInst *VlsegN = Builder.CreateIntrinsic(
22383FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22384 {LI->getPointerOperand(), VL});
22385
22386for (unsigned i = 0; i < Shuffles.size(); i++) {
22387Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22388 Shuffles[i]->replaceAllUsesWith(SubVec);
22389 }
22390
22391returntrue;
22392}
22393
22394staticconstIntrinsic::IDFixedVssegIntrIds[] = {
22395 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22396 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22397 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22398 Intrinsic::riscv_seg8_store};
22399
22400/// Lower an interleaved store into a vssegN intrinsic.
22401///
22402/// E.g. Lower an interleaved store (Factor = 3):
22403/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22404/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22405/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22406///
22407/// Into:
22408/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22409/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22410/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22411/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22412/// %ptr, i32 4)
22413///
22414/// Note that the new shufflevectors will be removed and we'll only generate one
22415/// vsseg3 instruction in CodeGen.
22416boolRISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
22417ShuffleVectorInst *SVI,
22418unsigned Factor) const{
22419IRBuilder<> Builder(SI);
22420auto Mask = SVI->getShuffleMask();
22421auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22422// Given SVI : <n*factor x ty>, then VTy : <n x ty>
22423auto *VTy =FixedVectorType::get(ShuffleVTy->getElementType(),
22424 ShuffleVTy->getNumElements() / Factor);
22425if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22426 SI->getPointerAddressSpace(),
22427 SI->getDataLayout()))
22428returnfalse;
22429
22430auto *XLenTy =Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22431
22432unsigned Index;
22433// If the segment store only has one active lane (i.e. the interleave is
22434// just a spread shuffle), we can use a strided store instead. This will
22435// be equally fast, and create less vector register pressure.
22436if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22437isSpreadMask(Mask, Factor, Index)) {
22438unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22439Value *Data = SVI->getOperand(0);
22440auto *DataVTy = cast<FixedVectorType>(Data->getType());
22441Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22442Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22443Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(),Offset);
22444Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22445Value *VL = Builder.getInt32(VTy->getNumElements());
22446
22447CallInst *CI = Builder.CreateIntrinsic(
22448 Intrinsic::experimental_vp_strided_store,
22449 {Data->getType(), BasePtr->getType(), Stride->getType()},
22450 {Data, BasePtr, Stride, Mask, VL});
22451 CI->addParamAttr(
22452 1,Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22453
22454returntrue;
22455 }
22456
22457Function *VssegNFunc =Intrinsic::getOrInsertDeclaration(
22458 SI->getModule(),FixedVssegIntrIds[Factor - 2],
22459 {VTy, SI->getPointerOperandType(), XLenTy});
22460
22461SmallVector<Value *, 10> Ops;
22462
22463for (unsigned i = 0; i < Factor; i++) {
22464Value *Shuffle = Builder.CreateShuffleVector(
22465 SVI->getOperand(0), SVI->getOperand(1),
22466createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22467 Ops.push_back(Shuffle);
22468 }
22469// This VL should be OK (should be executable in one vsseg instruction,
22470// potentially under larger LMULs) because we checked that the fixed vector
22471// type fits in isLegalInterleavedAccessType
22472Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22473 Ops.append({SI->getPointerOperand(), VL});
22474
22475 Builder.CreateCall(VssegNFunc, Ops);
22476
22477returntrue;
22478}
22479
22480boolRISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
22481LoadInst *LI,ArrayRef<Value *> DeinterleaveValues) const{
22482unsigned Factor = DeinterleaveValues.size();
22483if (Factor > 8)
22484returnfalse;
22485
22486assert(LI->isSimple());
22487IRBuilder<> Builder(LI);
22488
22489auto *ResVTy = cast<VectorType>(DeinterleaveValues[0]->getType());
22490
22491constDataLayout &DL = LI->getDataLayout();
22492
22493if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22494 LI->getPointerAddressSpace(),DL))
22495returnfalse;
22496
22497Value *Return;
22498Type *XLenTy =Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22499
22500if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22501Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22502 Return =
22503 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22504 {ResVTy, LI->getPointerOperandType(), XLenTy},
22505 {LI->getPointerOperand(), VL});
22506 }else {
22507staticconstIntrinsic::ID IntrIds[] = {
22508 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22509 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22510 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22511 Intrinsic::riscv_vlseg8};
22512
22513unsigned SEW =DL.getTypeSizeInBits(ResVTy->getElementType());
22514unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22515Type *VecTupTy =TargetExtType::get(
22516 LI->getContext(),"riscv.vector.tuple",
22517ScalableVectorType::get(Type::getInt8Ty(LI->getContext()),
22518 NumElts * SEW / 8),
22519 Factor);
22520
22521Value *VL =Constant::getAllOnesValue(XLenTy);
22522
22523Value *Vlseg = Builder.CreateIntrinsic(
22524 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22525 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22526 ConstantInt::get(XLenTy,Log2_64(SEW))});
22527
22528SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22529 Return =PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22530for (unsigned i = 0; i < Factor; ++i) {
22531Value *VecExtract = Builder.CreateIntrinsic(
22532 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22533 {Vlseg, Builder.getInt32(i)});
22534 Return = Builder.CreateInsertValue(Return, VecExtract, i);
22535 }
22536 }
22537
22538for (auto [Idx, DIV] :enumerate(DeinterleaveValues)) {
22539// We have to create a brand new ExtractValue to replace each
22540// of these old ExtractValue instructions.
22541Value *NewEV =
22542 Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
22543 DIV->replaceAllUsesWith(NewEV);
22544 }
22545
22546returntrue;
22547}
22548
22549boolRISCVTargetLowering::lowerInterleaveIntrinsicToStore(
22550StoreInst *SI,ArrayRef<Value *> InterleaveValues) const{
22551unsigned Factor = InterleaveValues.size();
22552if (Factor > 8)
22553returnfalse;
22554
22555assert(SI->isSimple());
22556IRBuilder<> Builder(SI);
22557
22558auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
22559constDataLayout &DL = SI->getDataLayout();
22560
22561if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22562 SI->getPointerAddressSpace(),DL))
22563returnfalse;
22564
22565Type *XLenTy =Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22566
22567if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22568Function *VssegNFunc =Intrinsic::getOrInsertDeclaration(
22569 SI->getModule(),FixedVssegIntrIds[Factor - 2],
22570 {InVTy, SI->getPointerOperandType(), XLenTy});
22571
22572SmallVector<Value *, 10> Ops(InterleaveValues.begin(),
22573 InterleaveValues.end());
22574Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22575 Ops.append({SI->getPointerOperand(), VL});
22576
22577 Builder.CreateCall(VssegNFunc, Ops);
22578 }else {
22579staticconstIntrinsic::ID IntrIds[] = {
22580 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22581 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22582 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22583 Intrinsic::riscv_vsseg8};
22584
22585unsigned SEW =DL.getTypeSizeInBits(InVTy->getElementType());
22586unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22587Type *VecTupTy =TargetExtType::get(
22588 SI->getContext(),"riscv.vector.tuple",
22589ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22590 NumElts * SEW / 8),
22591 Factor);
22592
22593Function *VssegNFunc =Intrinsic::getOrInsertDeclaration(
22594 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22595
22596Value *VL =Constant::getAllOnesValue(XLenTy);
22597
22598Value *StoredVal =PoisonValue::get(VecTupTy);
22599for (unsigned i = 0; i < Factor; ++i)
22600 StoredVal = Builder.CreateIntrinsic(
22601 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22602 {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
22603
22604 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22605 ConstantInt::get(XLenTy,Log2_64(SEW))});
22606 }
22607
22608returntrue;
22609}
22610
22611MachineInstr *
22612RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
22613MachineBasicBlock::instr_iterator &MBBI,
22614constTargetInstrInfo *TII) const{
22615assert(MBBI->isCall() &&MBBI->getCFIType() &&
22616"Invalid call instruction for a KCFI check");
22617assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22618MBBI->getOpcode()));
22619
22620MachineOperand &Target =MBBI->getOperand(0);
22621Target.setIsRenamable(false);
22622
22623returnBuildMI(MBB,MBBI,MBBI->getDebugLoc(),TII->get(RISCV::KCFI_CHECK))
22624 .addReg(Target.getReg())
22625 .addImm(MBBI->getCFIType())
22626 .getInstr();
22627}
22628
22629#define GET_REGISTER_MATCHER
22630#include "RISCVGenAsmMatcher.inc"
22631
22632Register
22633RISCVTargetLowering::getRegisterByName(constchar *RegName,LLT VT,
22634constMachineFunction &MF) const{
22635Register Reg =MatchRegisterAltName(RegName);
22636if (Reg == RISCV::NoRegister)
22637 Reg =MatchRegisterName(RegName);
22638if (Reg == RISCV::NoRegister)
22639report_fatal_error(
22640Twine("Invalid register name \"" +StringRef(RegName) +"\"."));
22641BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22642if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22643report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22644StringRef(RegName) +"\"."));
22645return Reg;
22646}
22647
22648MachineMemOperand::Flags
22649RISCVTargetLowering::getTargetMMOFlags(constInstruction &I) const{
22650constMDNode *NontemporalInfo =I.getMetadata(LLVMContext::MD_nontemporal);
22651
22652if (NontemporalInfo ==nullptr)
22653returnMachineMemOperand::MONone;
22654
22655// 1 for default value work as __RISCV_NTLH_ALL
22656// 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22657// 3 -> __RISCV_NTLH_ALL_PRIVATE
22658// 4 -> __RISCV_NTLH_INNERMOST_SHARED
22659// 5 -> __RISCV_NTLH_ALL
22660int NontemporalLevel = 5;
22661constMDNode *RISCVNontemporalInfo =
22662I.getMetadata("riscv-nontemporal-domain");
22663if (RISCVNontemporalInfo !=nullptr)
22664 NontemporalLevel =
22665 cast<ConstantInt>(
22666 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22667 ->getValue())
22668 ->getZExtValue();
22669
22670assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22671"RISC-V target doesn't support this non-temporal domain.");
22672
22673 NontemporalLevel -= 2;
22674MachineMemOperand::Flags Flags =MachineMemOperand::MONone;
22675if (NontemporalLevel & 0b1)
22676 Flags |=MONontemporalBit0;
22677if (NontemporalLevel & 0b10)
22678 Flags |=MONontemporalBit1;
22679
22680return Flags;
22681}
22682
22683MachineMemOperand::Flags
22684RISCVTargetLowering::getTargetMMOFlags(constMemSDNode &Node) const{
22685
22686MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22687MachineMemOperand::Flags TargetFlags =MachineMemOperand::MONone;
22688 TargetFlags |= (NodeFlags &MONontemporalBit0);
22689 TargetFlags |= (NodeFlags &MONontemporalBit1);
22690return TargetFlags;
22691}
22692
22693boolRISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
22694constMemSDNode &NodeX,constMemSDNode &NodeY) const{
22695returngetTargetMMOFlags(NodeX) ==getTargetMMOFlags(NodeY);
22696}
22697
22698boolRISCVTargetLowering::isCtpopFast(EVT VT) const{
22699if (VT.isScalableVector())
22700returnisTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22701if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22702returntrue;
22703return Subtarget.hasStdExtZbb() &&
22704 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22705}
22706
22707unsignedRISCVTargetLowering::getCustomCtpopCost(EVT VT,
22708ISD::CondCodeCond) const{
22709returnisCtpopFast(VT) ? 0 : 1;
22710}
22711
22712boolRISCVTargetLowering::shouldInsertFencesForAtomic(
22713constInstruction *I) const{
22714if (Subtarget.hasStdExtZalasr()) {
22715if (Subtarget.hasStdExtZtso()) {
22716// Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22717// should be lowered to plain load/store. The easiest way to do this is
22718// to say we should insert fences for them, and the fence insertion code
22719// will just not insert any fences
22720auto *LI = dyn_cast<LoadInst>(I);
22721auto *SI = dyn_cast<StoreInst>(I);
22722if ((LI &&
22723 (LI->getOrdering() ==AtomicOrdering::SequentiallyConsistent)) ||
22724 (SI &&
22725 (SI->getOrdering() ==AtomicOrdering::SequentiallyConsistent))) {
22726// Here, this is a load or store which is seq_cst, and needs a .aq or
22727// .rl therefore we shouldn't try to insert fences
22728returnfalse;
22729 }
22730// Here, we are a TSO inst that isn't a seq_cst load/store
22731return isa<LoadInst>(I) || isa<StoreInst>(I);
22732 }
22733returnfalse;
22734 }
22735// Note that one specific case requires fence insertion for an
22736// AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22737// than this hook due to limitations in the interface here.
22738return isa<LoadInst>(I) || isa<StoreInst>(I);
22739}
22740
22741boolRISCVTargetLowering::fallBackToDAGISel(constInstruction &Inst) const{
22742
22743// GISel support is in progress or complete for these opcodes.
22744unsignedOp = Inst.getOpcode();
22745if (Op == Instruction::Add ||Op == Instruction::Sub ||
22746Op == Instruction::And ||Op == Instruction::Or ||
22747Op == Instruction::Xor ||Op == Instruction::InsertElement ||
22748Op == Instruction::ShuffleVector ||Op == Instruction::Load ||
22749Op == Instruction::Freeze ||Op == Instruction::Store)
22750returnfalse;
22751
22752if (Inst.getType()->isScalableTy())
22753returntrue;
22754
22755for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22756if (Inst.getOperand(i)->getType()->isScalableTy() &&
22757 !isa<ReturnInst>(&Inst))
22758returntrue;
22759
22760if (constAllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22761if (AI->getAllocatedType()->isScalableTy())
22762returntrue;
22763 }
22764
22765returnfalse;
22766}
22767
22768SDValue
22769RISCVTargetLowering::BuildSDIVPow2(SDNode *N,constAPInt &Divisor,
22770SelectionDAG &DAG,
22771SmallVectorImpl<SDNode *> &Created) const{
22772AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
22773if (isIntDivCheap(N->getValueType(0), Attr))
22774returnSDValue(N, 0);// Lower SDIV as SDIV
22775
22776// Only perform this transform if short forward branch opt is supported.
22777if (!Subtarget.hasShortForwardBranchOpt())
22778returnSDValue();
22779EVT VT =N->getValueType(0);
22780if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22781returnSDValue();
22782
22783// Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22784if (Divisor.sgt(2048) || Divisor.slt(-2048))
22785returnSDValue();
22786returnTargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22787}
22788
22789bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22790EVT VT,constAPInt &AndMask) const{
22791if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22792return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22793returnTargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
22794}
22795
22796unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const{
22797return Subtarget.getMinimumJumpTableEntries();
22798}
22799
22800SDValueRISCVTargetLowering::expandIndirectJTBranch(constSDLoc &dl,
22801SDValueValue,SDValueAddr,
22802int JTI,
22803SelectionDAG &DAG) const{
22804if (Subtarget.hasStdExtZicfilp()) {
22805// When Zicfilp enabled, we need to use software guarded branch for jump
22806// table branch.
22807SDValue Chain =Value;
22808// Jump table debug info is only needed if CodeView is enabled.
22809if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF())
22810 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22811return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain,Addr);
22812 }
22813returnTargetLowering::expandIndirectJTBranch(dl,Value,Addr, JTI, DAG);
22814}
22815
22816// If an output pattern produces multiple instructions tablegen may pick an
22817// arbitrary type from an instructions destination register class to use for the
22818// VT of that MachineSDNode. This VT may be used to look up the representative
22819// register class. If the type isn't legal, the default implementation will
22820// not find a register class.
22821//
22822// Some integer types smaller than XLen are listed in the GPR register class to
22823// support isel patterns for GISel, but are not legal in SelectionDAG. The
22824// arbitrary type tablegen picks may be one of these smaller types.
22825//
22826// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22827// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22828std::pair<const TargetRegisterClass *, uint8_t>
22829RISCVTargetLowering::findRepresentativeClass(constTargetRegisterInfo *TRI,
22830MVT VT) const{
22831switch (VT.SimpleTy) {
22832default:
22833break;
22834case MVT::i8:
22835case MVT::i16:
22836case MVT::i32:
22837returnTargetLowering::findRepresentativeClass(TRI, Subtarget.getXLenVT());
22838case MVT::bf16:
22839case MVT::f16:
22840returnTargetLowering::findRepresentativeClass(TRI, MVT::f32);
22841 }
22842
22843returnTargetLowering::findRepresentativeClass(TRI, VT);
22844}
22845
22846namespacellvm::RISCVVIntrinsicsTable {
22847
22848#define GET_RISCVVIntrinsicsTable_IMPL
22849#include "RISCVGenSearchableTables.inc"
22850
22851}// namespace llvm::RISCVVIntrinsicsTable
22852
22853boolRISCVTargetLowering::hasInlineStackProbe(constMachineFunction &MF) const{
22854
22855// If the function specifically requests inline stack probes, emit them.
22856if (MF.getFunction().hasFnAttribute("probe-stack"))
22857return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22858"inline-asm";
22859
22860returnfalse;
22861}
22862
22863unsignedRISCVTargetLowering::getStackProbeSize(constMachineFunction &MF,
22864Align StackAlign) const{
22865// The default stack probe size is 4096 if the function has no
22866// stack-probe-size attribute.
22867constFunction &Fn = MF.getFunction();
22868unsigned StackProbeSize =
22869 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22870// Round down to the stack alignment.
22871 StackProbeSize =alignDown(StackProbeSize, StackAlign.value());
22872return StackProbeSize ? StackProbeSize : StackAlign.value();
22873}
22874
22875SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValueOp,
22876SelectionDAG &DAG) const{
22877MachineFunction &MF = DAG.getMachineFunction();
22878if (!hasInlineStackProbe(MF))
22879returnSDValue();
22880
22881MVT XLenVT = Subtarget.getXLenVT();
22882// Get the inputs.
22883SDValue Chain =Op.getOperand(0);
22884SDValueSize =Op.getOperand(1);
22885
22886MaybeAlignAlign =
22887 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
22888SDLoc dl(Op);
22889EVT VT =Op.getValueType();
22890
22891// Construct the new SP value in a GPR.
22892SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
22893 Chain = SP.getValue(1);
22894 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP,Size);
22895if (Align)
22896 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
22897 DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
22898
22899// Set the real SP to the new value with a probing loop.
22900 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
22901return DAG.getMergeValues({SP, Chain}, dl);
22902}
22903
22904MachineBasicBlock *
22905RISCVTargetLowering::emitDynamicProbedAlloc(MachineInstr &MI,
22906MachineBasicBlock *MBB) const{
22907MachineFunction &MF = *MBB->getParent();
22908MachineBasicBlock::iteratorMBBI =MI.getIterator();
22909DebugLocDL =MBB->findDebugLoc(MBBI);
22910Register TargetReg =MI.getOperand(1).getReg();
22911
22912constRISCVInstrInfo *TII = Subtarget.getInstrInfo();
22913bool IsRV64 = Subtarget.is64Bit();
22914Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
22915constRISCVTargetLowering *TLI = Subtarget.getTargetLowering();
22916uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
22917
22918MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
22919MachineBasicBlock *LoopTestMBB =
22920 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
22921 MF.insert(MBBInsertPoint, LoopTestMBB);
22922MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
22923 MF.insert(MBBInsertPoint, ExitMBB);
22924RegisterSPReg = RISCV::X2;
22925Register ScratchReg =
22926 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
22927
22928// ScratchReg = ProbeSize
22929TII->movImm(*MBB,MBBI,DL, ScratchReg, ProbeSize,MachineInstr::NoFlags);
22930
22931// LoopTest:
22932// SUB SP, SP, ProbeSize
22933BuildMI(*LoopTestMBB, LoopTestMBB->end(),DL,TII->get(RISCV::SUB),SPReg)
22934 .addReg(SPReg)
22935 .addReg(ScratchReg);
22936
22937// s[d|w] zero, 0(sp)
22938BuildMI(*LoopTestMBB, LoopTestMBB->end(),DL,
22939TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
22940 .addReg(RISCV::X0)
22941 .addReg(SPReg)
22942 .addImm(0);
22943
22944// BLT TargetReg, SP, LoopTest
22945BuildMI(*LoopTestMBB, LoopTestMBB->end(),DL,TII->get(RISCV::BLT))
22946 .addReg(TargetReg)
22947 .addReg(SPReg)
22948 .addMBB(LoopTestMBB);
22949
22950// Adjust with: MV SP, TargetReg.
22951BuildMI(*ExitMBB, ExitMBB->end(),DL,TII->get(RISCV::ADDI),SPReg)
22952 .addReg(TargetReg)
22953 .addImm(0);
22954
22955 ExitMBB->splice(ExitMBB->end(),MBB, std::next(MBBI),MBB->end());
22956 ExitMBB->transferSuccessorsAndUpdatePHIs(MBB);
22957
22958 LoopTestMBB->addSuccessor(ExitMBB);
22959 LoopTestMBB->addSuccessor(LoopTestMBB);
22960MBB->addSuccessor(LoopTestMBB);
22961
22962MI.eraseFromParent();
22963 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
22964return ExitMBB->begin()->getParent();
22965}
MRI
unsigned const MachineRegisterInfo * MRI
Definition:AArch64AdvSIMDScalarPass.cpp:105
MatchRegisterName
static MCRegister MatchRegisterName(StringRef Name)
getContainerForFixedLengthVector
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
Definition:AArch64ISelLowering.cpp:28326
performORCombine
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
Definition:AArch64ISelLowering.cpp:19359
performANDCombine
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition:AArch64ISelLowering.cpp:19567
tryWidenMaskForShuffle
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
Definition:AArch64ISelLowering.cpp:13744
performSETCCCombine
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition:AArch64ISelLowering.cpp:24965
convertToScalableVector
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition:AArch64ISelLowering.cpp:28414
convertFromScalableVector
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition:AArch64ISelLowering.cpp:28425
NODE_NAME_CASE
#define NODE_NAME_CASE(node)
Definition:AMDGPUISelLowering.cpp:5459
isConstant
static bool isConstant(const MachineInstr &MI)
Definition:AMDGPUInstructionSelector.cpp:2862
Select
AMDGPU Register Bank Select
Definition:AMDGPURegBankSelect.cpp:71
isZeroOrAllOnes
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
Definition:ARMISelLowering.cpp:12550
combineSelectAndUseCommutative
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
Definition:ARMISelLowering.cpp:12665
LowerATOMIC_FENCE
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition:ARMISelLowering.cpp:4357
combineSelectAndUse
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
Definition:ARMISelLowering.cpp:12639
MBB
MachineBasicBlock & MBB
Definition:ARMSLSHardening.cpp:71
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition:ARMSLSHardening.cpp:72
MatchRegisterAltName
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Results
Function Alias Analysis Results
Definition:AliasAnalysis.cpp:731
getTargetNode
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Definition:BPFISelLowering.cpp:724
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Info
Analysis containing CSE Info
Definition:CSEInfo.cpp:27
convertValVTToLocVT
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
Definition:CSKYISelLowering.cpp:199
unpackFromMemLoc
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Definition:CSKYISelLowering.cpp:261
convertLocVTToValVT
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
Definition:CSKYISelLowering.cpp:215
emitSelectPseudo
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
Definition:CSKYISelLowering.cpp:963
unpackFromRegLoc
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Definition:CSKYISelLowering.cpp:229
CommandLine.h
RetTy
return RetTy
Definition:DeadArgumentElimination.cpp:361
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
Debug.h
LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition:Debug.h:106
DiagnosticInfo.h
DiagnosticPrinter.h
Addr
uint64_t Addr
Definition:ELFObjHandler.cpp:79
Size
uint64_t Size
Definition:ELFObjHandler.cpp:81
End
bool End
Definition:ELF_riscv.cpp:480
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Check
#define Check(C,...)
Definition:GenericConvergenceVerifierImpl.h:34
im
#define im(i)
TII
const HexagonInstrInfo * TII
Definition:HexagonCopyToCombine.cpp:125
_
#define _
Definition:HexagonMCCodeEmitter.cpp:46
IRBuilder.h
MI
IRTranslator LLVM IR MI
Definition:IRTranslator.cpp:112
InstructionCost.h
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
Instructions.h
KnownBits.h
RegName
#define RegName(no)
getPrefTypeAlign
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
Definition:LoongArchISelLowering.cpp:5483
customLegalizeToWOpWithSExt
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
Definition:LoongArchISelLowering.cpp:2788
customLegalizeToWOp
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
Definition:LoongArchISelLowering.cpp:2754
getIntrinsicForMaskedAtomicRMWBinOp
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
Definition:LoongArchISelLowering.cpp:5986
Reduction
loop Loop Strength Reduction
Definition:LoopStrengthReduce.cpp:7191
isSplat
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
Definition:LowerMatrixIntrinsics.cpp:102
MCCodeEmitter.h
MCInstBuilder.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
G
#define G(x, y, z)
Definition:MD5.cpp:56
Operands
mir Rename Register Operands
Definition:MIRNamerPass.cpp:74
MachineFrameInfo.h
MachineFunction.h
MachineInstrBuilder.h
MachineJumpTableInfo.h
MachineRegisterInfo.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition:MachineSink.cpp:2029
MathExtras.h
MemoryLocation.h
This file provides utility analysis objects describing memory locations.
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition:MipsDisassembler.cpp:520
performADDCombine
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
Definition:MipsISelLowering.cpp:1129
performSUBCombine
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
Definition:MipsISelLowering.cpp:1114
performSELECTCombine
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
Definition:MipsISelLowering.cpp:687
performMULCombine
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
Definition:MipsSEISelLowering.cpp:867
performXORCombine
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
Definition:MipsSEISelLowering.cpp:1036
performVSELECTCombine
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
Definition:MipsSEISelLowering.cpp:1019
performSRACombine
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
Definition:MipsSEISelLowering.cpp:931
II
uint64_t IntrinsicInst * II
Definition:NVVMIntrRange.cpp:51
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
getCodeModel
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
Definition:PPCAsmPrinter.cpp:479
PatternMatch.h
getName
static StringRef getName(Value *V)
Definition:ProvenanceAnalysisEvaluator.cpp:20
RISCVConstantPoolValue.h
SPReg
static constexpr Register SPReg
Definition:RISCVFrameLowering.cpp:108
getExtensionType
static StringRef getExtensionType(StringRef Ext)
Definition:RISCVISAInfo.cpp:220
performCONCAT_VECTORSCombine
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Definition:RISCVISelLowering.cpp:17080
SplitVectorReductionOp
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:6612
lowerVECTOR_SHUFFLE
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:5361
emitBuildPairF64Pseudo
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:19278
emitQuietFCMP
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:19332
isElementRotate
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
Definition:RISCVISelLowering.cpp:4576
FixedVlsegIntrIds
static const Intrinsic::ID FixedVlsegIntrIds[]
Definition:RISCVISelLowering.cpp:22326
lowerBuildVectorOfConstants
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3687
getLMUL1VT
static MVT getLMUL1VT(MVT VT)
Definition:RISCVISelLowering.cpp:3365
lowerVECTOR_SHUFFLEAsVSlide1
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
Definition:RISCVISelLowering.cpp:4796
hasPassthruOp
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
Definition:RISCVISelLowering.cpp:6503
combineTruncToVnclip
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:17505
getExactInteger
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
Definition:RISCVISelLowering.cpp:3379
isInterleaveShuffle
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
Definition:RISCVISelLowering.cpp:4532
narrowIndex
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
Definition:RISCVISelLowering.cpp:14832
getPACKOpcode
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3988
promoteVCIXScalar
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:9669
splatSplitI64WithVL
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:4376
splatPartsI64WithVL
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:4320
getWideningInterleave
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:4924
getAllOnesMask
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
Definition:RISCVISelLowering.cpp:2749
FPImmCost
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
lowerScalarSplat
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:4388
lookupMaskedIntrinsic
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
Definition:RISCVISelLowering.cpp:19623
expandMul
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:14566
performVWADDSUBW_VLCombine
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:15917
matchIndexAsWiderOp
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
Definition:RISCVISelLowering.cpp:17404
isLegalBitRotate
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
Definition:RISCVISelLowering.cpp:5069
combineOp_VLToVWOp_VL
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
Definition:RISCVISelLowering.cpp:15770
combineVFMADD_VLWithVFNEG_VL
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:16417
combineOrOfCZERO
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:14449
useInversedSetcc
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16849
lowerDisjointIndicesShuffle
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
Definition:RISCVISelLowering.cpp:5279
combineVWADDSUBWSelect
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:15875
EmitLoweredCascadedSelect
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:19369
performINSERT_VECTOR_ELTCombine
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Definition:RISCVISelLowering.cpp:17008
lowerFABSorFNEG
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:6273
lowerFMAXIMUM_FMINIMUM
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:6189
SplitStrictFPVectorOp
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:6627
tryDemorganOfBooleanCondition
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:16588
performMemPairCombine
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition:RISCVISelLowering.cpp:15991
combineDeMorganOfBoolean
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:14269
lowerVECTOR_SHUFFLEAsVSlidedown
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:4675
getRVVReductionOp
static unsigned getRVVReductionOp(unsigned ISDOpcode)
Definition:RISCVISelLowering.cpp:10185
combineSubShiftToOrcB
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:14183
matchSetCC
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
Definition:RISCVISelLowering.cpp:8174
lowerShuffleViaVRegSplitting
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:5120
getVCIXISDNodeVOID
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
Definition:RISCVISelLowering.cpp:10006
lowerFCOPYSIGN
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:6296
NumRepeatedDivisors
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
foldSelectOfCTTZOrCTLZ
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:16790
lowerFP_TO_INT_SAT
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:2906
foldBinOpIntoSelectIfProfitable
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:8273
combineVectorMulToSraBitcast
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:14745
combineScalarCTPOPToVCPOP
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:17637
hasMaskOp
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
Definition:RISCVISelLowering.cpp:6527
legalizeScatterGatherIndexType
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
Definition:RISCVISelLowering.cpp:17336
combineSelectToBinOp
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:8197
isSpreadMask
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Definition:RISCVISelLowering.cpp:4870
getRISCVVLOp
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
Definition:RISCVISelLowering.cpp:6357
getVecReduceOpcode
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
Definition:RISCVISelLowering.cpp:13605
getDefaultVLOps
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:2765
isPromotedOpNeedingSplit
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:6545
performFP_TO_INT_SATCombine
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16175
lowerReductionSeq
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
Definition:RISCVISelLowering.cpp:10317
performVP_REVERSECombine
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16255
lowerGetVectorLength
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:9611
getDefaultScalableVLOps
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:2756
getVLOperand
static SDValue getVLOperand(SDValue Op)
Definition:RISCVISelLowering.cpp:2580
performVECTOR_SHUFFLECombine
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Definition:RISCVISelLowering.cpp:17206
performVP_STORECombine
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16317
emitFROUND
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:19696
getLargeExternalSymbol
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:7924
lowerCttzElts
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:9646
lowerVectorIntrinsicScalars
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:9430
performSIGN_EXTEND_INREGCombine
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:14948
lowerVectorXRINT
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3315
ExtensionMaxWebSize
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
getDeinterleaveShiftAndTrunc
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:4642
combineBinOpOfZExt
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:14049
matchSelectAddSub
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
Definition:RISCVISelLowering.cpp:16880
isSelectPseudo
static bool isSelectPseudo(MachineInstr &MI)
Definition:RISCVISelLowering.cpp:19315
getSmallestVTForIndex
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:9065
useRVVForFixedLengthVectorVT
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:2593
useTpOffset
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
Definition:RISCVISelLowering.cpp:22237
combineAddOfBooleanXor
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:14091
combineTruncOfSraSext
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:17451
getSingleShuffleSrc
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
Definition:RISCVISelLowering.cpp:4496
emitSplitF64Pseudo
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:19243
emitVFROUND_NOEXCEPT_MASK
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
Definition:RISCVISelLowering.cpp:19633
SplitVectorOp
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:6556
negateFMAOpcode
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
Definition:RISCVISelLowering.cpp:16379
lowerScalarInsert
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:4442
transformAddShlImm
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:13839
tryFoldSelectIntoOp
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
Definition:RISCVISelLowering.cpp:16740
VP_CASE
#define VP_CASE(NODE)
lowerBitreverseShuffle
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:5017
lowerConstant
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:5988
matchIndexAsShuffle
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
Definition:RISCVISelLowering.cpp:17369
performVFMADD_VLCombine
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16456
combineBinOpToReduce
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:13733
SplitVPOp
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:6581
lowerBUILD_VECTOR
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:4065
processVCIXOperands
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:9705
widenVectorOpsToi8
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:10944
lowerINT_TO_FP
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:2879
lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3079
lowerFTRUNC_FCEIL_FFLOOR_FROUND
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3287
isSimpleVIDSequence
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
Definition:RISCVISelLowering.cpp:3412
getVSlideup
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
Definition:RISCVISelLowering.cpp:3354
computeGREVOrGORC
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
Definition:RISCVISelLowering.cpp:18870
lowerVECTOR_SHUFFLEAsRotate
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:5092
matchRoundingOp
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
Definition:RISCVISelLowering.cpp:3044
lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3186
combineTruncSelectToSMaxUSat
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:14319
performBITREVERSECombine
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16238
transformAddImmMulImm
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:13989
combineSubOfBoolean
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:14139
matchSplatAsGather
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3518
isValidEGW
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:9728
combine_CC
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16647
isNonZeroAVL
static bool isNonZeroAVL(SDValue AVL)
Definition:RISCVISelLowering.cpp:10308
DEBUG_TYPE
#define DEBUG_TYPE
Definition:RISCVISelLowering.cpp:53
lowerFP_TO_INT
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:3019
lowerVECTOR_SHUFFLEAsVSlideup
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:4752
getVCIXISDNodeWCHAIN
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
Definition:RISCVISelLowering.cpp:9969
getLargeGlobalAddress
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:7914
emitReadCounterWidePseudo
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
Definition:RISCVISelLowering.cpp:19178
getWideningSpread
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:4899
AllowSplatInVW_W
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
unpackF64OnRV32DSoftABI
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
Definition:RISCVISelLowering.cpp:20117
tryMemPairCombine
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
Definition:RISCVISelLowering.cpp:15933
getRVVFPReductionOpAndOperands
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:10400
performFP_TO_INTCombine
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:16076
combineBinOpOfExtractToReduceTree
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
Definition:RISCVISelLowering.cpp:13638
lowerBuildVectorViaPacking
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
Definition:RISCVISelLowering.cpp:4008
performTRUNCATECombine
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:14383
lowerBuildVectorViaDominantValues
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
Definition:RISCVISelLowering.cpp:3581
isCompressMask
static bool isCompressMask(ArrayRef< int > Mask)
Definition:RISCVISelLowering.cpp:5248
translateSetCCForBranch
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
Definition:RISCVISelLowering.cpp:2297
combineToVWMACC
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:17264
performBUILD_VECTORCombine
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
Definition:RISCVISelLowering.cpp:16950
OP_CASE
#define OP_CASE(NODE)
FixedVssegIntrIds
static const Intrinsic::ID FixedVssegIntrIds[]
Definition:RISCVISelLowering.cpp:22394
getVSlidedown
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
Definition:RISCVISelLowering.cpp:3342
RISCVISelLowering.h
getMaskTypeFor
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
Definition:RISCVLegalizerInfo.cpp:952
getRISCVWOpcode
static unsigned getRISCVWOpcode(unsigned Opcode)
Definition:RISCVLegalizerInfo.cpp:1287
RISCVMachineFunctionInfo.h
RISCVMatInt.h
Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition:RISCVRedundantCopyElimination.cpp:75
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
RISCVRegisterInfo.h
RISCVSelectionDAGInfo.h
RISCVSubtarget.h
RISCV.h
ROTR
#define ROTR(x, n)
Definition:SHA256.cpp:32
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
getValueType
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
Definition:SLPVectorizer.cpp:243
isCommutative
static bool isCommutative(Instruction *I)
Definition:SLPVectorizer.cpp:509
SelectionDAGAddressAnalysis.h
ROTL
#define ROTL(x, b)
Definition:SipHash.cpp:32
SmallSet.h
This file defines the SmallSet class.
Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition:Statistic.h:166
getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition:TapiFile.cpp:39
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
TargetLoweringObjectFileImpl.h
UndefPoisonKind::PoisonOnly
@ PoisonOnly
ValueTypes.h
VectorUtils.h
Concat
static constexpr int Concat[]
Definition:X86InterleavedAccess.cpp:232
RHS
Value * RHS
Definition:X86PartialReduction.cpp:74
LHS
Value * LHS
Definition:X86PartialReduction.cpp:73
T
llvm::APFloat
Definition:APFloat.h:904
llvm::APFloat::convertFromAPInt
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition:APFloat.h:1334
llvm::APFloat::convertToInteger
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition:APFloat.h:1326
llvm::APFloat::getNaN
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition:APFloat.h:1111
llvm::APInt
Class for arbitrary precision integers.
Definition:APInt.h:78
llvm::APInt::getSignMask
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition:APInt.h:229
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition:APInt.h:1520
llvm::APInt::setBitsFrom
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition:APInt.h:1386
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition:APInt.h:1492
llvm::APInt::trunc
APInt trunc(unsigned width) const
Truncate to new width.
Definition:APInt.cpp:910
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition:APInt.h:1330
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition:APInt.h:1201
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition:APInt.h:371
llvm::APInt::ugt
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition:APInt.h:1182
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition:APInt.h:380
llvm::APInt::getSignedMaxValue
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition:APInt.h:209
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition:APInt.h:329
llvm::APInt::sdiv
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition:APInt.cpp:1618
llvm::APInt::clearAllBits
void clearAllBits()
Set every bit to 0.
Definition:APInt.h:1397
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition:APInt.h:435
llvm::APInt::getSplat
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition:APInt.cpp:624
llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition:APInt.h:219
llvm::APInt::getSignificantBits
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition:APInt.h:1511
llvm::APInt::insertBits
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition:APInt.cpp:370
llvm::APInt::srem
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition:APInt.cpp:1710
llvm::APInt::isMask
bool isMask(unsigned numBits) const
Definition:APInt.h:488
llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition:APInt.h:334
llvm::APInt::sext
APInt sext(unsigned width) const
Sign extend to a new width.
Definition:APInt.cpp:959
llvm::APInt::isSubsetOf
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition:APInt.h:1257
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition:APInt.h:440
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition:APInt.h:306
llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition:APInt.h:1130
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition:APInt.h:296
llvm::APInt::setLowBits
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition:APInt.h:1389
llvm::APInt::extractBits
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition:APInt.cpp:455
llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition:APInt.h:286
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition:APInt.h:239
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition:APInt.h:1542
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition:APInt.h:1221
llvm::APSInt
An arbitrary precision integer that knows its signedness.
Definition:APSInt.h:23
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition:Instructions.h:63
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition:Argument.h:31
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::ArrayRef::end
iterator end() const
Definition:ArrayRef.h:157
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition:ArrayRef.h:168
llvm::ArrayRef::begin
iterator begin() const
Definition:ArrayRef.h:156
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition:ArrayRef.h:198
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition:Instructions.h:501
llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition:Instructions.h:633
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition:Instructions.h:704
llvm::AtomicRMWInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition:Instructions.h:827
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition:Instructions.h:716
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition:Instructions.h:720
llvm::AtomicRMWInst::USubCond
@ USubCond
Subtract only if no unsigned overflow.
Definition:Instructions.h:764
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition:Instructions.h:734
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition:Instructions.h:728
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition:Instructions.h:722
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition:Instructions.h:724
llvm::AtomicRMWInst::USubSat
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition:Instructions.h:768
llvm::AtomicRMWInst::UIncWrap
@ UIncWrap
Increment one up to a maximum value.
Definition:Instructions.h:756
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition:Instructions.h:732
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition:Instructions.h:738
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition:Instructions.h:736
llvm::AtomicRMWInst::UDecWrap
@ UDecWrap
Decrement one until a minimum value or zero.
Definition:Instructions.h:760
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition:Instructions.h:718
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition:Instructions.h:726
llvm::AtomicRMWInst::isFloatingPointOperation
bool isFloatingPointOperation() const
Definition:Instructions.h:882
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition:Instructions.h:805
llvm::AtomicRMWInst::getValOperand
Value * getValOperand()
Definition:Instructions.h:874
llvm::AtomicRMWInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition:Instructions.h:847
llvm::AttributeList
Definition:Attributes.h:490
llvm::AttributeList::hasFnAttr
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
Definition:Attributes.cpp:1877
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition:Attributes.cpp:392
llvm::Attribute::getWithAlignment
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition:Attributes.cpp:234
llvm::BaseIndexOffset::match
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
Definition:SelectionDAGAddressAnalysis.cpp:301
llvm::BasicBlock
LLVM Basic Block Representation.
Definition:BasicBlock.h:61
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition:BasicBlock.cpp:292
llvm::BitVector
Definition:BitVector.h:82
llvm::BitVector::test
bool test(unsigned Idx) const
Definition:BitVector.h:461
llvm::BitVector::set
BitVector & set()
Definition:BitVector.h:351
llvm::BitVector::all
bool all() const
all - Returns true if all bits are set.
Definition:BitVector.h:175
llvm::BlockAddressSDNode
Definition:SelectionDAGNodes.h:2314
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition:CallingConvLower.h:170
llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition:CallingConvLower.h:315
llvm::CCState::AnalyzeCallOperands
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
Definition:CallingConvLower.cpp:126
llvm::CCState::getStackSize
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
Definition:CallingConvLower.h:245
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition:CallingConvLower.cpp:85
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition:CallingConvLower.h:33
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition:CallingConvLower.h:122
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition:CallingConvLower.h:128
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition:CallingConvLower.h:134
llvm::CCValAssign::BCvt
@ BCvt
Definition:CallingConvLower.h:46
llvm::CCValAssign::Full
@ Full
Definition:CallingConvLower.h:36
llvm::CCValAssign::Indirect
@ Indirect
Definition:CallingConvLower.h:52
llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition:CallingConvLower.h:126
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition:CallingConvLower.h:120
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition:CallingConvLower.h:123
llvm::CCValAssign::getLocMemOffset
int64_t getLocMemOffset() const
Definition:CallingConvLower.h:129
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition:CallingConvLower.h:132
llvm::CallBase::isMustTailCall
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition:Instructions.cpp:343
llvm::CallBase::isIndirectCall
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Definition:Instructions.cpp:334
llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition:InstrTypes.h:1494
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::CallInst::isTailCall
bool isTailCall() const
Definition:Instructions.h:1589
llvm::ConstantFPSDNode
Definition:SelectionDAGNodes.h:1739
llvm::ConstantFPSDNode::isExactlyValue
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition:SelectionDAGNodes.h:1775
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition:Constants.h:83
llvm::ConstantInt::isMinusOne
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition:Constants.h:220
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition:Constants.h:208
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition:Constants.h:157
llvm::ConstantPoolSDNode
Definition:SelectionDAGNodes.h:2002
llvm::ConstantSDNode
Definition:SelectionDAGNodes.h:1684
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition:SelectionDAGNodes.h:1701
llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition:SelectionDAGNodes.h:1700
llvm::ConstantSDNode::isOpaque
bool isOpaque() const
Definition:SelectionDAGNodes.h:1715
llvm::ConstantSDNode::isZero
bool isZero() const
Definition:SelectionDAGNodes.h:1710
llvm::Constant
This is an important base class in LLVM.
Definition:Constant.h:42
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition:Constants.cpp:420
llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition:DWARFExpression.h:32
llvm::DWARFExpression::Operation::getNumOperands
uint64_t getNumOperands() const
Definition:DWARFExpression.h:90
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::DataLayout::getPointerSizeInBits
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition:DataLayout.h:364
llvm::DataLayout::getPrefTypeAlign
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition:DataLayout.cpp:847
llvm::DebugLoc
A debug info location.
Definition:DebugLoc.h:33
llvm::DemandedBits
Definition:DemandedBits.h:40
llvm::DenseMapBase::size
unsigned size() const
Definition:DenseMap.h:99
llvm::DenseMapBase::at
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition:DenseMap.h:202
llvm::DenseMapBase::contains
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition:DenseMap.h:147
llvm::DenseMap
Definition:DenseMap.h:727
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition:DenseSet.h:278
llvm::DiagnosticInfoUnsupported
Diagnostic information for unsupported feature in backend.
Definition:DiagnosticInfo.h:1097
llvm::ElementCount
Definition:TypeSize.h:300
llvm::ElementCount::getScalable
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition:TypeSize.h:314
llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition:TypeSize.h:311
llvm::ExternalSymbolSDNode
Definition:SelectionDAGNodes.h:2356
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition:Type.cpp:791
llvm::FunctionType
Class to represent function types.
Definition:DerivedTypes.h:105
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition:DerivedTypes.h:137
llvm::FunctionType::getReturnType
Type * getReturnType() const
Definition:DerivedTypes.h:126
llvm::Function
Definition:Function.h:63
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition:Function.h:216
llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition:Function.cpp:766
llvm::Function::getFnAttributeAsParsedInteger
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition:Function.cpp:778
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition:Function.h:704
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition:Function.h:277
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition:Function.h:353
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition:Function.cpp:369
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition:Function.h:886
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition:Function.cpp:731
llvm::GISelAddressing::BaseIndexOffset
Helper struct to store a base, index and offset that forms an address.
Definition:LoadStoreOpt.h:38
llvm::GISelAddressing::BaseIndexOffset::getOffset
int64_t getOffset() const
Definition:LoadStoreOpt.h:54
llvm::GlobalAddressSDNode
Definition:SelectionDAGNodes.h:1876
llvm::GlobalValue
Definition:GlobalValue.h:48
llvm::GlobalValue::isDSOLocal
bool isDSOLocal() const
Definition:GlobalValue.h:306
llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition:GlobalValue.h:530
llvm::HexagonInstrInfo::storeRegToStackSlot
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
Definition:HexagonInstrInfo.cpp:962
llvm::HexagonInstrInfo::loadRegFromStackSlot
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Load the specified register of the given register class from the specified stack frame index.
Definition:HexagonInstrInfo.cpp:1011
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition:IRBuilder.h:113
llvm::IRBuilderBase::CreateConstGEP1_32
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition:IRBuilder.h:1887
llvm::IRBuilderBase::CreateInsertValue
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition:IRBuilder.h:2562
llvm::IRBuilderBase::CreateExtractValue
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition:IRBuilder.h:2555
llvm::IRBuilderBase::CreateFence
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition:IRBuilder.h:1842
llvm::IRBuilderBase::CreateSExt
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition:IRBuilder.h:2045
llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition:IRBuilder.h:545
llvm::IRBuilderBase::CreatePtrAdd
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition:IRBuilder.h:1987
llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition:IRBuilder.h:193
llvm::IRBuilderBase::getInt64Ty
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition:IRBuilder.h:550
llvm::IRBuilderBase::getAllOnesMask
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition:IRBuilder.h:867
llvm::IRBuilderBase::CreateIntrinsic
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition:IRBuilder.cpp:900
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition:IRBuilder.h:505
llvm::IRBuilderBase::CreateNot
Value * CreateNot(Value *V, const Twine &Name="")
Definition:IRBuilder.h:1757
llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition:IRBuilder.h:1387
llvm::IRBuilderBase::getIntN
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition:IRBuilder.h:516
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition:IRBuilder.h:2533
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition:IRBuilder.h:2449
llvm::IRBuilderBase::CreateAtomicRMW
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition:IRBuilder.h:1862
llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition:IRBuilder.h:2019
llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition:IRBuilder.h:535
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition:IRBuilder.h:2705
llvm::InlineAsm::ConstraintCode
ConstraintCode
Definition:InlineAsm.h:239
llvm::InlineAsm::ConstraintCode::A
@ A
llvm::InstructionCost
Definition:InstructionCost.h:29
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition:InstructionCost.h:73
llvm::Instruction
Definition:Instruction.h:68
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition:Instruction.cpp:68
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition:Instruction.h:291
llvm::Instruction::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition:Instruction.cpp:76
llvm::IntegerType
Class to represent integer types.
Definition:DerivedTypes.h:42
llvm::JumpTableSDNode
Definition:SelectionDAGNodes.h:1981
llvm::LLT
Definition:LowLevelType.h:39
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition:LLVMContext.h:67
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition:LLVMContext.cpp:245
llvm::LSBaseSDNode
Base class for LoadSDNode and StoreSDNode.
Definition:SelectionDAGNodes.h:2431
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition:SelectionDAGNodes.h:2452
llvm::LoadInst
An instruction for reading from memory.
Definition:Instructions.h:176
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition:Instructions.h:261
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition:Instructions.h:255
llvm::LoadInst::getPointerOperandType
Type * getPointerOperandType() const
Definition:Instructions.h:258
llvm::LoadInst::isSimple
bool isSimple() const
Definition:Instructions.h:247
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition:Instructions.h:211
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition:SelectionDAGNodes.h:2464
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition:SelectionDAGNodes.h:2483
llvm::LocationSize::beforeOrAfterPointer
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Definition:MemoryLocation.h:137
llvm::MCContext
Context object for machine code objects.
Definition:MCContext.h:83
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition:MCExpr.h:34
llvm::MCInstBuilder
Definition:MCInstBuilder.h:21
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition:MCInst.h:185
llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition:MCObjectFileInfo.h:252
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition:MCSubtargetInfo.h:76
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition:MCExpr.h:398
llvm::MDNode
Metadata node.
Definition:Metadata.h:1073
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition:Metadata.h:1434
llvm::MVT
Machine Value Type.
Definition:MachineValueType.h:35
llvm::MVT::getFloatingPointVT
static MVT getFloatingPointVT(unsigned BitWidth)
Definition:MachineValueType.h:431
llvm::MVT::SimpleValueType
SimpleValueType
Definition:MachineValueType.h:37
llvm::MVT::integer_fixedlen_vector_valuetypes
static auto integer_fixedlen_vector_valuetypes()
Definition:MachineValueType.h:554
llvm::MVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition:MachineValueType.h:277
llvm::MVT::isRISCVVectorTuple
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
Definition:MachineValueType.h:120
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition:MachineValueType.h:55
llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition:MachineValueType.h:346
llvm::MVT::changeVectorElementType
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition:MachineValueType.h:207
llvm::MVT::bitsLE
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
Definition:MachineValueType.h:425
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition:MachineValueType.h:294
llvm::MVT::getRISCVVectorTupleVT
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
Definition:MachineValueType.h:471
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition:MachineValueType.h:106
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition:MachineValueType.h:90
llvm::MVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
Definition:MachineValueType.h:113
llvm::MVT::getScalableVectorVT
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
Definition:MachineValueType.h:461
llvm::MVT::getRISCVVectorTupleNumFields
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
Definition:MachineValueType.h:482
llvm::MVT::changeTypeToInteger
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition:MachineValueType.h:217
llvm::MVT::getVT
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition:ValueTypes.cpp:237
llvm::MVT::bitsLT
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
Definition:MachineValueType.h:418
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition:MachineValueType.h:308
llvm::MVT::isPow2VectorType
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition:MachineValueType.h:241
llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition:MachineValueType.h:342
llvm::MVT::getFltSemantics
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition:ValueTypes.cpp:307
llvm::MVT::bitsGT
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
Definition:MachineValueType.h:404
llvm::MVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition:MachineValueType.h:135
llvm::MVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition:MachineValueType.h:290
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition:MachineValueType.h:356
llvm::MVT::bitsGE
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
Definition:MachineValueType.h:411
llvm::MVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
Definition:MachineValueType.h:100
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition:MachineValueType.h:451
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition:MachineValueType.h:263
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition:MachineValueType.h:80
llvm::MVT::isValid
bool isValid() const
Return true if this is a valid simple valuetype.
Definition:MachineValueType.h:74
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition:MachineValueType.h:441
llvm::MVT::getDoubleNumVectorElementsVT
MVT getDoubleNumVectorElementsVT() const
Definition:MachineValueType.h:234
llvm::MVT::getHalfNumVectorElementsVT
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
Definition:MachineValueType.h:225
llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition:MachineValueType.h:259
llvm::MVT::integer_scalable_vector_valuetypes
static auto integer_scalable_vector_valuetypes()
Definition:MachineValueType.h:566
llvm::MVT::changeVectorElementTypeToInteger
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition:MachineValueType.h:196
llvm::MVT::fp_fixedlen_vector_valuetypes
static auto fp_fixedlen_vector_valuetypes()
Definition:MachineValueType.h:560
llvm::MachineBasicBlock
Definition:MachineBasicBlock.h:125
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition:MachineBasicBlock.cpp:937
llvm::MachineBasicBlock::getSymbol
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
Definition:MachineBasicBlock.cpp:63
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition:MachineBasicBlock.h:1002
llvm::MachineBasicBlock::setCallFrameSize
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
Definition:MachineBasicBlock.h:1223
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition:MachineBasicBlock.h:256
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition:MachineBasicBlock.cpp:798
llvm::MachineBasicBlock::begin
iterator begin()
Definition:MachineBasicBlock.h:355
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Definition:MachineBasicBlock.cpp:1516
llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition:MachineBasicBlock.h:314
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition:MachineBasicBlock.h:341
llvm::MachineBasicBlock::end
iterator end()
Definition:MachineBasicBlock.h:357
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition:MachineBasicBlock.h:311
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition:MachineBasicBlock.h:1109
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition:MachineBasicBlock.h:319
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition:MachineFrameInfo.h:106
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition:MachineFrameInfo.cpp:83
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition:MachineFrameInfo.cpp:51
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition:MachineFrameInfo.h:374
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition:MachineFrameInfo.h:647
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition:MachineFrameInfo.h:380
llvm::MachineFunctionProperties::Property::NoPHIs
@ NoPHIs
llvm::MachineFunction
Definition:MachineFunction.h:267
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition:MachineFunction.h:733
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition:MachineFunction.cpp:536
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition:MachineFunction.h:749
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition:MachineFunction.h:743
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition:MachineFunction.cpp:309
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition:MachineFunction.h:704
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition:MachineFunction.h:831
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition:MachineFunction.cpp:762
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition:MachineFunction.cpp:499
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition:MachineFunction.h:966
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition:MachineInstrBuilder.h:133
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition:MachineInstrBuilder.h:226
llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition:MachineInstrBuilder.h:154
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition:MachineInstrBuilder.h:99
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition:MachineInstrBuilder.h:148
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition:MachineInstrBuilder.h:204
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition:MachineInstrBuilder.h:91
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MachineInstr
Representation of each machine instruction.
Definition:MachineInstr.h:71
llvm::MachineInstr::collectDebugValues
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
Definition:MachineInstr.cpp:2464
llvm::MachineInstr::NoFPExcept
@ NoFPExcept
Definition:MachineInstr.h:113
llvm::MachineInstr::NoFlags
@ NoFlags
Definition:MachineInstr.h:86
llvm::MachineInstr::setFlag
void setFlag(MIFlag Flag)
Set a MI flag.
Definition:MachineInstr.h:406
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition:MachineInstr.cpp:767
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition:MachineInstr.h:587
llvm::MachineJumpTableInfo
Definition:MachineJumpTableInfo.h:46
llvm::MachineJumpTableInfo::EK_Custom32
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
Definition:MachineJumpTableInfo.h:86
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition:MachineMemOperand.h:129
llvm::MachineMemOperand::getRanges
const MDNode * getRanges() const
Return the range tag for the memory reference.
Definition:MachineMemOperand.h:269
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition:MachineMemOperand.h:132
llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition:MachineMemOperand.h:140
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition:MachineMemOperand.h:144
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition:MachineMemOperand.h:136
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition:MachineMemOperand.h:142
llvm::MachineMemOperand::MONone
@ MONone
Definition:MachineMemOperand.h:134
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition:MachineMemOperand.h:146
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition:MachineMemOperand.h:138
llvm::MachineMemOperand::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition:MachineMemOperand.h:204
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition:MachineMemOperand.h:224
llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition:MachineMemOperand.h:266
llvm::MachineMemOperand::getBaseAlign
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
Definition:MachineMemOperand.h:263
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition:MachineOperand.h:48
llvm::MachineOperand::getImm
int64_t getImm() const
Definition:MachineOperand.h:556
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition:MachineOperand.h:820
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition:MachineOperand.h:369
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition:MachineOperand.h:838
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition:MachineRegisterInfo.h:51
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition:MachineRegisterInfo.cpp:156
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition:MachineRegisterInfo.h:1006
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition:SelectionDAGNodes.h:1352
llvm::MemSDNode::isSimple
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Definition:SelectionDAGNodes.h:1429
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition:SelectionDAGNodes.h:1436
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition:SelectionDAGNodes.h:1455
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition:SelectionDAGNodes.h:1432
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition:MemoryLocation.h:232
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition:Module.h:65
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition:Constants.cpp:1878
llvm::RISCVConstantPoolValue
A RISCV-specific constant pool value.
Definition:RISCVConstantPoolValue.h:28
llvm::RISCVConstantPoolValue::Create
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
Definition:RISCVConstantPoolValue.cpp:29
llvm::RISCVInstrInfo
Definition:RISCVInstrInfo.h:62
llvm::RISCVMachineFunctionInfo
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
Definition:RISCVMachineFunctionInfo.h:47
llvm::RISCVMachineFunctionInfo::setVarArgsFrameIndex
void setVarArgsFrameIndex(int Index)
Definition:RISCVMachineFunctionInfo.h:93
llvm::RISCVMachineFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex() const
Definition:RISCVMachineFunctionInfo.h:92
llvm::RISCVMachineFunctionInfo::setVarArgsSaveSize
void setVarArgsSaveSize(int Size)
Definition:RISCVMachineFunctionInfo.h:96
llvm::RISCVMachineFunctionInfo::addSExt32Register
void addSExt32Register(Register Reg)
Definition:RISCVMachineFunctionInfo.cpp:69
llvm::RISCVSubtarget
Definition:RISCVSubtarget.h:78
llvm::RISCVSubtarget::getTargetABI
RISCVABI::ABI getTargetABI() const
Definition:RISCVSubtarget.h:233
llvm::RISCVSubtarget::getMinimumJumpTableEntries
unsigned getMinimumJumpTableEntries() const
Definition:RISCVSubtarget.cpp:214
llvm::RISCVSubtarget::hasStdExtCOrZca
bool hasStdExtCOrZca() const
Definition:RISCVSubtarget.h:162
llvm::RISCVSubtarget::getMaxLMULForFixedLengthVectors
unsigned getMaxLMULForFixedLengthVectors() const
Definition:RISCVSubtarget.cpp:190
llvm::RISCVSubtarget::hasVInstructionsI64
bool hasVInstructionsI64() const
Definition:RISCVSubtarget.h:249
llvm::RISCVSubtarget::hasVInstructionsF64
bool hasVInstructionsF64() const
Definition:RISCVSubtarget.h:254
llvm::RISCVSubtarget::getMaxStoresPerMemcpy
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Definition:RISCVSubtarget.h:366
llvm::RISCVSubtarget::hasStdExtDOrZdinx
bool hasStdExtDOrZdinx() const
Definition:RISCVSubtarget.h:169
llvm::RISCVSubtarget::getMaxLoadsPerMemcmp
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
Definition:RISCVSubtarget.h:376
llvm::RISCVSubtarget::hasStdExtZfhOrZhinx
bool hasStdExtZfhOrZhinx() const
Definition:RISCVSubtarget.h:170
llvm::RISCVSubtarget::getRealMinVLen
unsigned getRealMinVLen() const
Definition:RISCVSubtarget.h:206
llvm::RISCVSubtarget::getMaxStoresPerMemset
unsigned getMaxStoresPerMemset(bool OptSize) const
Definition:RISCVSubtarget.h:357
llvm::RISCVSubtarget::expandVScale
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
Definition:RISCVSubtarget.h:225
llvm::RISCVSubtarget::useRVVForFixedLengthVectors
bool useRVVForFixedLengthVectors() const
Definition:RISCVSubtarget.cpp:199
llvm::RISCVSubtarget::isTargetFuchsia
bool isTargetFuchsia() const
Definition:RISCVSubtarget.h:316
llvm::RISCVSubtarget::hasVInstructionsBF16Minimal
bool hasVInstructionsBF16Minimal() const
Definition:RISCVSubtarget.h:252
llvm::RISCVSubtarget::getDLenFactor
unsigned getDLenFactor() const
Definition:RISCVSubtarget.h:285
llvm::RISCVSubtarget::getXLenVT
MVT getXLenVT() const
Definition:RISCVSubtarget.h:185
llvm::RISCVSubtarget::getMaxStoresPerMemmove
unsigned getMaxStoresPerMemmove(bool OptSize) const
Definition:RISCVSubtarget.h:371
llvm::RISCVSubtarget::hasVInstructionsF16Minimal
bool hasVInstructionsF16Minimal() const
Definition:RISCVSubtarget.h:250
llvm::RISCVSubtarget::getMaxGluedStoresPerMemcpy
unsigned getMaxGluedStoresPerMemcpy() const
Definition:RISCVSubtarget.h:362
llvm::RISCVSubtarget::getXLen
unsigned getXLen() const
Definition:RISCVSubtarget.h:188
llvm::RISCVSubtarget::hasConditionalMoveFusion
bool hasConditionalMoveFusion() const
Definition:RISCVSubtarget.h:178
llvm::RISCVSubtarget::hasVInstructionsF16
bool hasVInstructionsF16() const
Definition:RISCVSubtarget.h:251
llvm::RISCVSubtarget::getMaxBuildIntsCost
unsigned getMaxBuildIntsCost() const
Definition:RISCVSubtarget.cpp:150
llvm::RISCVSubtarget::useCCMovInsn
bool useCCMovInsn() const
Definition:RISCVSubtarget.cpp:251
llvm::RISCVSubtarget::getPrefLoopAlignment
Align getPrefLoopAlignment() const
Definition:RISCVSubtarget.h:148
llvm::RISCVSubtarget::hasVInstructions
bool hasVInstructions() const
Definition:RISCVSubtarget.h:248
llvm::RISCVSubtarget::isRegisterReservedByUser
bool isRegisterReservedByUser(Register i) const override
Definition:RISCVSubtarget.h:239
llvm::RISCVSubtarget::getRealVLen
std::optional< unsigned > getRealVLen() const
Definition:RISCVSubtarget.h:215
llvm::RISCVSubtarget::hasOptimizedSegmentLoadStore
bool hasOptimizedSegmentLoadStore(unsigned NF) const
Definition:RISCVSubtarget.h:262
llvm::RISCVSubtarget::useConstantPoolForLargeInts
bool useConstantPoolForLargeInts() const
Definition:RISCVSubtarget.cpp:146
llvm::RISCVSubtarget::getPrefFunctionAlignment
Align getPrefFunctionAlignment() const
Definition:RISCVSubtarget.h:145
llvm::RISCVSubtarget::hasStdExtZfhminOrZhinxmin
bool hasStdExtZfhminOrZhinxmin() const
Definition:RISCVSubtarget.h:171
llvm::RISCVSubtarget::getRealMaxVLen
unsigned getRealMaxVLen() const
Definition:RISCVSubtarget.h:210
llvm::RISCVSubtarget::getRegisterInfo
const RISCVRegisterInfo * getRegisterInfo() const override
Definition:RISCVSubtarget.h:134
llvm::RISCVSubtarget::getInstrInfo
const RISCVInstrInfo * getInstrInfo() const override
Definition:RISCVSubtarget.h:133
llvm::RISCVSubtarget::getTargetLowering
const RISCVTargetLowering * getTargetLowering() const override
Definition:RISCVSubtarget.h:137
llvm::RISCVSubtarget::hasVInstructionsF32
bool hasVInstructionsF32() const
Definition:RISCVSubtarget.h:253
llvm::RISCVSubtarget::getELen
unsigned getELen() const
Definition:RISCVSubtarget.h:202
llvm::RISCVSubtarget::isTargetAndroid
bool isTargetAndroid() const
Definition:RISCVSubtarget.h:315
llvm::RISCVSubtarget::hasStdExtFOrZfinx
bool hasStdExtFOrZfinx() const
Definition:RISCVSubtarget.h:168
llvm::RISCVSubtarget::isSoftFPABI
bool isSoftFPABI() const
Definition:RISCVSubtarget.h:234
llvm::RISCVSubtarget::getFrameLowering
const RISCVFrameLowering * getFrameLowering() const override
Definition:RISCVSubtarget.h:130
llvm::RISCVSubtarget::getFLen
unsigned getFLen() const
Definition:RISCVSubtarget.h:193
llvm::RISCVSubtarget::is64Bit
bool is64Bit() const
Definition:RISCVSubtarget.h:184
llvm::RISCVTargetLowering
Definition:RISCVISelLowering.h:510
llvm::RISCVTargetLowering::computeVLMAXBounds
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
Definition:RISCVISelLowering.cpp:2795
llvm::RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
Definition:RISCVISelLowering.cpp:2498
llvm::RISCVTargetLowering::getVRGatherVVCost
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
Definition:RISCVISelLowering.cpp:2852
llvm::RISCVTargetLowering::getIndexedAddressParts
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
Definition:RISCVISelLowering.cpp:21763
llvm::RISCVTargetLowering::getSubregIndexByMVT
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Definition:RISCVISelLowering.cpp:2424
llvm::RISCVTargetLowering::getIRStackGuard
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Definition:RISCVISelLowering.cpp:22245
llvm::RISCVTargetLowering::shouldConvertFpToSat
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
Definition:RISCVISelLowering.cpp:21716
llvm::RISCVTargetLowering::getInlineAsmMemConstraint
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
Definition:RISCVISelLowering.cpp:21444
llvm::RISCVTargetLowering::LowerReturn
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
Definition:RISCVISelLowering.cpp:20720
llvm::RISCVTargetLowering::shouldFoldSelectWithIdentityConstant
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
Definition:RISCVISelLowering.cpp:2050
llvm::RISCVTargetLowering::mayBeEmittedAsTailCall
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Definition:RISCVISelLowering.cpp:20873
llvm::RISCVTargetLowering::RISCVTargetLowering
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
Definition:RISCVISelLowering.cpp:81
llvm::RISCVTargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition:RISCVISelLowering.cpp:19829
llvm::RISCVTargetLowering::emitLeadingFence
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
Definition:RISCVISelLowering.cpp:21498
llvm::RISCVTargetLowering::isTruncateFree
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition:RISCVISelLowering.cpp:1944
llvm::RISCVTargetLowering::lowerInterleaveIntrinsicToStore
bool lowerInterleaveIntrinsicToStore(StoreInst *SI, ArrayRef< Value * > InterleaveValues) const override
Lower an interleave intrinsic to a target specific store intrinsic.
Definition:RISCVISelLowering.cpp:22549
llvm::RISCVTargetLowering::shouldRemoveExtendFromGSIndex
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Definition:RISCVISelLowering.cpp:21706
llvm::RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
Definition:RISCVISelLowering.cpp:21613
llvm::RISCVTargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Definition:RISCVISelLowering.cpp:22012
llvm::RISCVTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Definition:RISCVISelLowering.cpp:21985
llvm::RISCVTargetLowering::getTargetConstantFromLoad
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
Definition:RISCVISelLowering.cpp:19134
llvm::RISCVTargetLowering::getSubtarget
const RISCVSubtarget & getSubtarget() const
Definition:RISCVISelLowering.h:517
llvm::RISCVTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition:RISCVISelLowering.cpp:17676
llvm::RISCVTargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition:RISCVISelLowering.cpp:2138
llvm::RISCVTargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition:RISCVISelLowering.cpp:18889
llvm::RISCVTargetLowering::preferScalarizeSplat
bool preferScalarizeSplat(SDNode *N) const override
Definition:RISCVISelLowering.cpp:22228
llvm::RISCVTargetLowering::getTargetNodeName
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Definition:RISCVISelLowering.cpp:20877
llvm::RISCVTargetLowering::shouldExtendTypeInLibCall
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Definition:RISCVISelLowering.cpp:21905
llvm::RISCVTargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition:RISCVISelLowering.cpp:1935
llvm::RISCVTargetLowering::shouldSignExtendTypeInLibCall
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Definition:RISCVISelLowering.cpp:21915
llvm::RISCVTargetLowering::LowerCustomJumpTableEntry
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
Definition:RISCVISelLowering.cpp:21743
llvm::RISCVTargetLowering::getVRGatherVICost
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
Definition:RISCVISelLowering.cpp:2859
llvm::RISCVTargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
Definition:RISCVISelLowering.cpp:2062
llvm::RISCVTargetLowering::targetShrinkDemandedConstant
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Definition:RISCVISelLowering.cpp:18784
llvm::RISCVTargetLowering::shouldExpandBuildVectorWithShuffles
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
Definition:RISCVISelLowering.cpp:2821
llvm::RISCVTargetLowering::getRegisterTypeForCallingConv
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
Definition:RISCVISelLowering.cpp:2247
llvm::RISCVTargetLowering::decomposeMulByConstant
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Definition:RISCVISelLowering.cpp:21923
llvm::RISCVTargetLowering::CanLowerReturn
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Definition:RISCVISelLowering.cpp:20702
llvm::RISCVTargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition:RISCVISelLowering.cpp:1897
llvm::RISCVTargetLowering::hasAndNotCompare
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
Definition:RISCVISelLowering.cpp:2027
llvm::RISCVTargetLowering::shouldScalarizeBinop
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
Definition:RISCVISelLowering.cpp:2116
llvm::RISCVTargetLowering::isDesirableToCommuteWithShift
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Definition:RISCVISelLowering.cpp:18696
llvm::RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
Definition:RISCVISelLowering.cpp:22693
llvm::RISCVTargetLowering::hasBitTest
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
Definition:RISCVISelLowering.cpp:2038
llvm::RISCVTargetLowering::computeVLMAX
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
Definition:RISCVISelLowering.h:827
llvm::RISCVTargetLowering::shouldExpandCttzElements
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
Definition:RISCVISelLowering.cpp:1616
llvm::RISCVTargetLowering::isCheapToSpeculateCtlz
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition:RISCVISelLowering.cpp:2006
llvm::RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
Definition:RISCVISelLowering.cpp:21686
llvm::RISCVTargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Definition:RISCVISelLowering.cpp:2169
llvm::RISCVTargetLowering::getLMULCost
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
Definition:RISCVISelLowering.cpp:2826
llvm::RISCVTargetLowering::getJumpTableEncoding
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
Definition:RISCVISelLowering.cpp:21733
llvm::RISCVTargetLowering::isMulAddWithConstProfitable
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
Definition:RISCVISelLowering.cpp:21962
llvm::RISCVTargetLowering::getVSlideVICost
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
Definition:RISCVISelLowering.cpp:2875
llvm::RISCVTargetLowering::fallBackToDAGISel
bool fallBackToDAGISel(const Instruction &Inst) const override
Definition:RISCVISelLowering.cpp:22741
llvm::RISCVTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition:RISCVISelLowering.cpp:1574
llvm::RISCVTargetLowering::lowerInterleavedLoad
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
Definition:RISCVISelLowering.cpp:22344
llvm::RISCVTargetLowering::isCtpopFast
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
Definition:RISCVISelLowering.cpp:22698
llvm::RISCVTargetLowering::ComputeNumSignBitsForTargetNode
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
Definition:RISCVISelLowering.cpp:19030
llvm::RISCVTargetLowering::getContainerForFixedLengthVector
MVT getContainerForFixedLengthVector(MVT VT) const
Definition:RISCVISelLowering.cpp:2710
llvm::RISCVTargetLowering::getRegClassIDForVecVT
static unsigned getRegClassIDForVecVT(MVT VT)
Definition:RISCVISelLowering.cpp:2447
llvm::RISCVTargetLowering::getExceptionPointerRegister
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
Definition:RISCVISelLowering.cpp:21895
llvm::RISCVTargetLowering::shouldExpandAtomicRMWInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition:RISCVISelLowering.cpp:21532
llvm::RISCVTargetLowering::isExtractSubvectorCheap
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
Definition:RISCVISelLowering.cpp:2208
llvm::RISCVTargetLowering::getRegForInlineAsmConstraint
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Definition:RISCVISelLowering.cpp:21176
llvm::RISCVTargetLowering::emitDynamicProbedAlloc
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition:RISCVISelLowering.cpp:22905
llvm::RISCVTargetLowering::getTargetMMOFlags
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
Definition:RISCVISelLowering.cpp:22649
llvm::RISCVTargetLowering::computeVLMax
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
Definition:RISCVISelLowering.cpp:2787
llvm::RISCVTargetLowering::signExtendConstant
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
Definition:RISCVISelLowering.cpp:1997
llvm::RISCVTargetLowering::shouldTransformSignedTruncationCheck
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
Definition:RISCVISelLowering.cpp:18677
llvm::RISCVTargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Definition:RISCVISelLowering.cpp:2093
llvm::RISCVTargetLowering::hasInlineStackProbe
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Definition:RISCVISelLowering.cpp:22853
llvm::RISCVTargetLowering::getRegisterByName
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
Definition:RISCVISelLowering.cpp:22633
llvm::RISCVTargetLowering::getVSlideVXCost
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
Definition:RISCVISelLowering.cpp:2867
llvm::RISCVTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition:RISCVISelLowering.cpp:6662
llvm::RISCVTargetLowering::getRegClassIDForLMUL
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
Definition:RISCVISelLowering.cpp:2406
llvm::RISCVTargetLowering::isUsedByReturnOnly
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
Definition:RISCVISelLowering.cpp:20836
llvm::RISCVTargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
Definition:RISCVISelLowering.cpp:21868
llvm::RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition:RISCVISelLowering.cpp:21673
llvm::RISCVTargetLowering::getExceptionSelectorRegister
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Definition:RISCVISelLowering.cpp:21900
llvm::RISCVTargetLowering::getCustomCtpopCost
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
Definition:RISCVISelLowering.cpp:22707
llvm::RISCVTargetLowering::AdjustInstrPostInstrSelection
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
Definition:RISCVISelLowering.cpp:19926
llvm::RISCVTargetLowering::isShuffleMaskLegal
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
Definition:RISCVISelLowering.cpp:5751
llvm::RISCVTargetLowering::isCheapToSpeculateCttz
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition:RISCVISelLowering.cpp:2001
llvm::RISCVTargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition:RISCVISelLowering.cpp:1931
llvm::RISCVTargetLowering::getExtendForAtomicCmpSwapArg
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
Definition:RISCVISelLowering.cpp:21890
llvm::RISCVTargetLowering::lowerInterleavedStore
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
Definition:RISCVISelLowering.cpp:22416
llvm::RISCVTargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
Definition:RISCVISelLowering.cpp:20150
llvm::RISCVTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition:RISCVISelLowering.cpp:12898
llvm::RISCVTargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
Definition:RISCVISelLowering.cpp:1621
llvm::RISCVTargetLowering::getVectorTypeBreakdownForCallingConv
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Definition:RISCVISelLowering.cpp:2284
llvm::RISCVTargetLowering::isLegalElementTypeForRVV
bool isLegalElementTypeForRVV(EVT ScalarTy) const
Definition:RISCVISelLowering.cpp:2550
llvm::RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
Definition:RISCVISelLowering.cpp:21751
llvm::RISCVTargetLowering::getLMUL
static RISCVII::VLMUL getLMUL(MVT VT)
Definition:RISCVISelLowering.cpp:2359
llvm::RISCVTargetLowering::getLegalZfaFPImm
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
Definition:RISCVISelLowering.cpp:2149
llvm::RISCVTargetLowering::LowerAsmOperandForConstraint
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
Definition:RISCVISelLowering.cpp:21458
llvm::RISCVTargetLowering::splitValueIntoRegisterParts
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Definition:RISCVISelLowering.cpp:22059
llvm::RISCVTargetLowering::emitTrailingFence
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Definition:RISCVISelLowering.cpp:21514
llvm::RISCVTargetLowering::getNumRegistersForCallingConv
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
Definition:RISCVISelLowering.cpp:2272
llvm::RISCVTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
Definition:RISCVISelLowering.cpp:21148
llvm::RISCVTargetLowering::EmitKCFICheck
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
Definition:RISCVISelLowering.cpp:22612
llvm::RISCVTargetLowering::isLegalInterleavedAccessType
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
Definition:RISCVISelLowering.cpp:22268
llvm::RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
Definition:RISCVISelLowering.cpp:19116
llvm::RISCVTargetLowering::isIntDivCheap
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
Definition:RISCVISelLowering.cpp:22220
llvm::RISCVTargetLowering::expandIndirectJTBranch
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
Definition:RISCVISelLowering.cpp:22800
llvm::RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad
bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, ArrayRef< Value * > DeinterleaveValues) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
Definition:RISCVISelLowering.cpp:22480
llvm::RISCVTargetLowering::getNumRegisters
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
Definition:RISCVISelLowering.cpp:2262
llvm::RISCVTargetLowering::getPostIndexedAddressParts
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
Definition:RISCVISelLowering.cpp:21821
llvm::RISCVTargetLowering::getPreIndexedAddressParts
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
Definition:RISCVISelLowering.cpp:21799
llvm::RISCVTargetLowering::joinRegisterPartsIntoValue
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
Definition:RISCVISelLowering.cpp:22153
llvm::RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
Definition:RISCVISelLowering.cpp:2011
llvm::RISCVTargetLowering::isSExtCheaperThanZExt
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
Definition:RISCVISelLowering.cpp:1993
llvm::RISCVTargetLowering::isLegalStridedLoadStore
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
Definition:RISCVISelLowering.cpp:22306
llvm::RISCVTargetLowering::LowerCall
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
Definition:RISCVISelLowering.cpp:20383
llvm::RISCVTargetLowering::isZExtFree
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Definition:RISCVISelLowering.cpp:1978
llvm::RISCVTargetLowering::getStackProbeSize
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
Definition:RISCVISelLowering.cpp:22863
llvm::RISCVTargetLowering::shouldInsertFencesForAtomic
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Definition:RISCVISelLowering.cpp:22712
llvm::Register
Wrapper class representing virtual and physical registers.
Definition:Register.h:19
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition:SelectionDAGNodes.h:1182
llvm::SDNode
Represents one node in the SelectionDAG.
Definition:SelectionDAGNodes.h:496
llvm::SDNode::ops
ArrayRef< SDUse > ops() const
Definition:SelectionDAGNodes.h:1001
llvm::SDNode::getAsAPIntVal
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
Definition:SelectionDAGNodes.h:1735
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition:SelectionDAGNodes.h:687
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition:SelectionDAGNodes.h:739
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition:SelectionDAGNodes.h:859
llvm::SDNode::getFlags
SDNodeFlags getFlags() const
Definition:SelectionDAGNodes.h:1043
llvm::SDNode::getSimpleValueType
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
Definition:SelectionDAGNodes.h:1068
llvm::SDNode::hasPredecessorHelper
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
Definition:SelectionDAGNodes.h:914
llvm::SDNode::getAsZExtVal
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Definition:SelectionDAGNodes.h:1727
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition:SelectionDAGNodes.h:992
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition:SelectionDAGNodes.h:1062
llvm::SDNode::setCFIType
void setCFIType(uint32_t Type)
Definition:SelectionDAGNodes.h:1055
llvm::SDNode::isUndef
bool isUndef() const
Return true if the type of the node type undefined.
Definition:SelectionDAGNodes.h:694
llvm::SDNode::users
iterator_range< user_iterator > users()
Definition:SelectionDAGNodes.h:871
llvm::SDNode::op_end
op_iterator op_end() const
Definition:SelectionDAGNodes.h:1000
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition:SelectionDAGNodes.h:999
llvm::SDUse
Represents a use of a SDNode.
Definition:SelectionDAGNodes.h:283
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition:SelectionDAGNodes.h:145
llvm::SDValue::isUndef
bool isUndef() const
Definition:SelectionDAGNodes.h:1249
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition:SelectionDAGNodes.h:159
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition:SelectionDAGNodes.h:1257
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition:SelectionDAGNodes.h:179
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition:SelectionDAGNodes.h:1217
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition:SelectionDAGNodes.h:199
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition:SelectionDAGNodes.h:1225
llvm::SDValue::getConstantOperandAPInt
const APInt & getConstantOperandAPInt(unsigned i) const
Definition:SelectionDAGNodes.h:1233
llvm::SDValue::getScalarValueSizeInBits
uint64_t getScalarValueSizeInBits() const
Definition:SelectionDAGNodes.h:203
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition:SelectionDAGNodes.h:1229
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition:SelectionDAGNodes.h:190
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition:SelectionDAGNodes.h:1213
llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition:SelectionDAGNodes.h:1221
llvm::ScalableVectorType::get
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition:Type.cpp:812
llvm::SelectionDAGTargetInfo::isTargetStrictFPOpcode
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
Definition:SelectionDAGTargetInfo.h:45
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition:SelectionDAG.h:228
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition:SelectionDAG.cpp:9287
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition:SelectionDAG.h:751
llvm::SelectionDAG::ComputeMaxSignificantBits
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
Definition:SelectionDAG.cpp:5417
llvm::SelectionDAG::getMaskedGather
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
Definition:SelectionDAG.cpp:10040
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition:SelectionDAG.h:802
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition:SelectionDAG.cpp:9034
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition:SelectionDAG.cpp:10708
llvm::SelectionDAG::getShiftAmountConstant
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
Definition:SelectionDAG.cpp:1811
llvm::SelectionDAG::getAllOnesConstant
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition:SelectionDAG.cpp:1800
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition:SelectionDAG.cpp:11149
llvm::SelectionDAG::getNeutralElement
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
Definition:SelectionDAG.cpp:13545
llvm::SelectionDAG::getVScale
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
Definition:SelectionDAG.cpp:2092
llvm::SelectionDAG::getFreeze
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
Definition:SelectionDAG.cpp:2462
llvm::SelectionDAG::getStridedLoadVP
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
Definition:SelectionDAG.cpp:9720
llvm::SelectionDAG::makeEquivalentMemoryOrdering
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
Definition:SelectionDAG.cpp:12152
llvm::SelectionDAG::getJumpTableDebugInfo
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
Definition:SelectionDAG.cpp:1961
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition:SelectionDAG.h:1251
llvm::SelectionDAG::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
Definition:SelectionDAG.h:2422
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition:SelectionDAG.cpp:1873
llvm::SelectionDAG::getRegister
SDValue getRegister(Register Reg, EVT VT)
Definition:SelectionDAG.cpp:2328
llvm::SelectionDAG::getElementCount
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
Definition:SelectionDAG.cpp:2111
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition:SelectionDAG.cpp:9270
llvm::SelectionDAG::getStepVector
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
Definition:SelectionDAG.cpp:2125
llvm::SelectionDAG::getMemcpy
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
Definition:SelectionDAG.cpp:8581
llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
Definition:SelectionDAG.h:2394
llvm::SelectionDAG::shouldOptForSize
bool shouldOptForSize() const
Definition:SelectionDAG.cpp:1401
llvm::SelectionDAG::SplitVectorOperand
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
Definition:SelectionDAG.h:2306
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition:SelectionDAG.cpp:1622
llvm::SelectionDAG::getVPZExtOrTrunc
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
Definition:SelectionDAG.cpp:1642
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition:SelectionDAG.h:503
llvm::SelectionDAG::getStridedStoreVP
SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition:SelectionDAG.cpp:9773
llvm::SelectionDAG::NewNodesMustHaveLegalTypes
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition:SelectionDAG.h:397
llvm::SelectionDAG::GetSplitDestVTs
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
Definition:SelectionDAG.cpp:12961
llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition:SelectionDAG.h:761
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition:SelectionDAG.h:1129
llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition:SelectionDAG.h:1106
llvm::SelectionDAG::getGatherVP
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
Definition:SelectionDAG.cpp:9858
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition:SelectionDAG.h:857
llvm::SelectionDAG::isSplatValue
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
Definition:SelectionDAG.cpp:3029
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition:SelectionDAG.cpp:2433
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition:SelectionDAG.h:828
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition:SelectionDAG.h:1280
llvm::SelectionDAG::getNegative
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
Definition:SelectionDAG.cpp:1617
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition:SelectionDAG.cpp:10917
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition:SelectionDAG.cpp:1568
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition:SelectionDAG.h:497
llvm::SelectionDAG::getSelectionDAGInfo
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition:SelectionDAG.h:505
llvm::SelectionDAG::getStoreVP
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition:SelectionDAG.cpp:9584
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition:SelectionDAG.cpp:1666
llvm::SelectionDAG::getMemBasePlusOffset
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
Definition:SelectionDAG.cpp:8052
llvm::SelectionDAG::getSignedTargetConstant
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition:SelectionDAG.h:713
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition:SelectionDAG.cpp:9371
llvm::SelectionDAG::ReplaceAllUsesWith
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition:SelectionDAG.cpp:11653
llvm::SelectionDAG::SplitVector
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
Definition:SelectionDAG.cpp:13006
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition:SelectionDAG.cpp:9320
llvm::SelectionDAG::getSignedConstant
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Definition:SelectionDAG.cpp:1794
llvm::SelectionDAG::getSplatVector
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition:SelectionDAG.h:891
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition:SelectionDAG.h:1094
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition:SelectionDAG.cpp:2969
llvm::SelectionDAG::FoldConstantArithmetic
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
Definition:SelectionDAG.cpp:6672
llvm::SelectionDAG::getMaskedStore
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition:SelectionDAG.cpp:9991
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition:SelectionDAG.cpp:2052
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition:SelectionDAG.h:498
llvm::SelectionDAG::getStrictFPExtendOrRound
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
Definition:SelectionDAG.cpp:1483
llvm::SelectionDAG::SplitEVL
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
Definition:SelectionDAG.cpp:13027
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition:SelectionDAG.h:1290
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition:SelectionDAG.cpp:1806
llvm::SelectionDAG::getScatterVP
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
Definition:SelectionDAG.cpp:9901
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition:SelectionDAG.cpp:2038
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition:SelectionDAG.cpp:10327
llvm::SelectionDAG::getFPExtendOrRound
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
Definition:SelectionDAG.cpp:1475
llvm::SelectionDAG::isKnownNeverNaN
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
Definition:SelectionDAG.cpp:5672
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition:SelectionDAG.h:701
llvm::SelectionDAG::ComputeNumSignBits
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition:SelectionDAG.cpp:4739
llvm::SelectionDAG::getBoolConstant
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
Definition:SelectionDAG.cpp:1651
llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition:SelectionDAG.h:797
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition:SelectionDAG.cpp:1824
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition:SelectionDAG.cpp:11814
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition:SelectionDAG.h:492
llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition:SelectionDAG.h:874
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition:SelectionDAG.cpp:1925
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition:SelectionDAG.cpp:3415
llvm::SelectionDAG::getRegisterMask
SDValue getRegisterMask(const uint32_t *RegMask)
Definition:SelectionDAG.cpp:2344
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition:SelectionDAG.cpp:1508
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition:SelectionDAG.cpp:2079
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition:SelectionDAG.cpp:2977
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition:SelectionDAG.h:510
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition:SelectionDAG.cpp:9045
llvm::SelectionDAG::getTargetExternalSymbol
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition:SelectionDAG.cpp:2069
llvm::SelectionDAG::CreateStackTemporary
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition:SelectionDAG.cpp:2776
llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition:SelectionDAG.h:768
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition:SelectionDAG.h:580
llvm::SelectionDAG::getMaskedLoad
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
Definition:SelectionDAG.cpp:9945
llvm::SelectionDAG::getSplat
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition:SelectionDAG.h:907
llvm::SelectionDAG::SplitScalar
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
Definition:SelectionDAG.cpp:12946
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition:SelectionDAG.cpp:2147
llvm::SelectionDAG::getLogicalNOT
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
Definition:SelectionDAG.cpp:1626
llvm::SelectionDAG::getMaskedScatter
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
Definition:SelectionDAG.cpp:10087
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition:Instructions.h:1901
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition:Instructions.cpp:1925
llvm::ShuffleVectorInst::isBitRotateMask
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
Definition:Instructions.cpp:2426
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition:Instructions.h:1941
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition:Instructions.cpp:1788
llvm::ShuffleVectorInst::isDeInterleaveMaskOfFactor
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
Definition:Instructions.cpp:2379
llvm::ShuffleVectorInst::isIdentityMask
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition:Instructions.cpp:1883
llvm::ShuffleVectorInst::isReverseMask
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition:Instructions.cpp:1891
llvm::ShuffleVectorInst::isInsertSubvectorMask
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
Definition:Instructions.cpp:2039
llvm::ShuffleVectorInst::isInterleaveMask
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Definition:Instructions.cpp:2295
llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition:SelectionDAGNodes.h:1625
llvm::ShuffleVectorSDNode::isSplatMask
static bool isSplatMask(const int *Mask, EVT VT)
Definition:SelectionDAG.cpp:13418
llvm::ShuffleVectorSDNode::getSplatIndex
int getSplatIndex() const
Definition:SelectionDAGNodes.h:1650
llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition:SelectionDAGNodes.h:1638
llvm::ShuffleVectorSDNode::isSplat
bool isSplat() const
Definition:SelectionDAGNodes.h:1648
llvm::SmallDenseMap
Definition:DenseMap.h:883
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition:SmallPtrSet.h:519
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition:SmallSet.h:132
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition:SmallSet.h:175
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition:SmallSet.h:181
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition:SmallVector.h:673
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition:SmallVector.h:937
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition:SmallVector.h:663
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition:SmallVector.h:683
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVectorTemplateCommon::end
iterator end()
Definition:SmallVector.h:269
llvm::SmallVectorTemplateCommon::front
reference front()
Definition:SmallVector.h:299
llvm::SmallVectorTemplateCommon::data
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition:SmallVector.h:286
llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition:SmallVector.h:267
llvm::SmallVectorTemplateCommon::back
reference back()
Definition:SmallVector.h:308
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StoreInst
An instruction for storing to memory.
Definition:Instructions.h:292
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition:SelectionDAGNodes.h:2492
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition:StringRef.h:150
llvm::StringRef::lower
std::string lower() const
Definition:StringRef.cpp:113
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition:StringSwitch.h:44
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition:StringSwitch.h:69
llvm::StringSwitch::Default
R Default(T Value)
Definition:StringSwitch.h:182
llvm::StringSwitch::Cases
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition:StringSwitch.h:90
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition:Type.cpp:406
llvm::TargetExtType::get
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition:Type.cpp:895
llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition:TargetFrameLowering.h:105
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition:TargetInstrInfo.h:112
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition:TargetLowering.h:2493
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition:TargetLowering.h:2562
llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition:TargetLowering.h:3757
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition:TargetLowering.h:1677
llvm::TargetLoweringBase::Custom
@ Custom
Definition:TargetLowering.h:204
llvm::TargetLoweringBase::Expand
@ Expand
Definition:TargetLowering.h:202
llvm::TargetLoweringBase::Promote
@ Promote
Definition:TargetLowering.h:201
llvm::TargetLoweringBase::LibCall
@ LibCall
Definition:TargetLowering.h:203
llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition:TargetLowering.h:3718
llvm::TargetLoweringBase::emitPatchPoint
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
Definition:TargetLoweringBase.cpp:1155
llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition:TargetLowering.h:1042
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition:TargetLowering.h:364
llvm::TargetLoweringBase::MaxLoadsPerMemcmp
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
Definition:TargetLowering.h:3737
llvm::TargetLoweringBase::getNumRegistersForCallingConv
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
Definition:TargetLowering.h:1795
llvm::TargetLoweringBase::MaxGluedStoresPerMemcpy
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
Definition:TargetLowering.h:3724
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition:TargetLowering.h:3066
llvm::TargetLoweringBase::getRegisterTypeForCallingConv
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
Definition:TargetLowering.h:1787
llvm::TargetLoweringBase::setOperationPromotedToType
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
Definition:TargetLowering.h:2716
llvm::TargetLoweringBase::TypeSoftenFloat
@ TypeSoftenFloat
Definition:TargetLowering.h:213
llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition:TargetLowering.h:216
llvm::TargetLoweringBase::getMinCmpXchgSizeInBits
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
Definition:TargetLowering.h:2163
llvm::TargetLoweringBase::getNumRegisters
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
Definition:TargetLowering.h:1763
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition:TargetLowering.h:2635
llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition:TargetLowering.h:2752
llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Definition:TargetLowering.h:2766
llvm::TargetLoweringBase::getVectorTypeBreakdownForCallingConv
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Definition:TargetLowering.h:1195
llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition:TargetLowering.h:2739
llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition:TargetLowering.h:1380
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition:TargetLowering.h:3703
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition:TargetLowering.h:2479
llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition:TargetLowering.h:3751
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition:TargetLoweringBase.cpp:1275
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition:TargetLowering.h:2972
llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition:TargetLowering.h:3753
llvm::TargetLoweringBase::shouldFoldSelectWithSingleBitTest
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
Definition:TargetLowering.h:3390
llvm::TargetLoweringBase::getIRStackGuard
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
Definition:TargetLoweringBase.cpp:1956
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition:TargetLowering.h:2545
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition:TargetLowering.h:1093
llvm::TargetLoweringBase::EnableExtLdPromotion
bool EnableExtLdPromotion
Definition:TargetLowering.h:3760
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition:TargetLowering.h:2652
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition:TargetLowering.h:371
llvm::TargetLoweringBase::setLibcallName
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
Definition:TargetLowering.h:3431
llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition:TargetLowering.h:2745
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition:TargetLowering.h:1447
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition:TargetLowering.h:3701
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition:TargetLowering.h:2625
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition:TargetLowering.h:236
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition:TargetLowering.h:1339
llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
Definition:TargetLowering.h:3739
llvm::TargetLoweringBase::isBinOp
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
Definition:TargetLowering.h:2941
llvm::TargetLoweringBase::setMinCmpXchgSizeInBits
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
Definition:TargetLowering.h:2783
llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition:TargetLowering.h:2511
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition:TargetLowering.h:253
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition:TargetLowering.h:2686
llvm::TargetLoweringBase::findRepresentativeClass
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
Definition:TargetLoweringBase.cpp:1248
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition:TargetLowering.h:2731
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition:TargetLowering.h:2579
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition:TargetLowering.h:1143
llvm::TargetLoweringBase::IsStrictFPEnabled
bool IsStrictFPEnabled
Definition:TargetLowering.h:3772
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition:TargetLowering.h:329
llvm::TargetLoweringBase::allowsMemoryAccessForAlignment
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
Definition:TargetLoweringBase.cpp:1708
llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition:TargetLowering.h:3716
llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition:TargetLowering.h:1367
llvm::TargetLoweringObjectFile
Definition:TargetLoweringObjectFile.h:45
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition:TargetLowering.h:3780
llvm::TargetLowering::expandAddSubSat
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
Definition:TargetLowering.cpp:10670
llvm::TargetLowering::buildSDIVPow2WithCMov
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
Definition:TargetLowering.cpp:6292
llvm::TargetLowering::ConstraintType
ConstraintType
Definition:TargetLowering.h:4950
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition:TargetLowering.h:4952
llvm::TargetLowering::C_Memory
@ C_Memory
Definition:TargetLowering.h:4953
llvm::TargetLowering::C_Immediate
@ C_Immediate
Definition:TargetLowering.h:4955
llvm::TargetLowering::C_Other
@ C_Other
Definition:TargetLowering.h:4956
llvm::TargetLowering::makeLibCall
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Definition:TargetLowering.cpp:147
llvm::TargetLowering::expandIndirectJTBranch
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
Definition:TargetLowering.cpp:478
llvm::TargetLowering::getInlineAsmMemConstraint
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
Definition:TargetLowering.h:5062
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition:TargetLowering.cpp:5525
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition:TargetLowering.cpp:10526
llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition:SelectionDAGBuilder.cpp:10958
llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition:TargetLowering.cpp:47
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition:TargetLowering.cpp:5669
llvm::TargetLowering::SimplifyDemandedBits
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Definition:TargetLowering.cpp:1134
llvm::TargetLowering::verifyReturnAddressArgumentIsConstant
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
Definition:TargetLowering.cpp:7260
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition:TargetLowering.cpp:5587
llvm::TargetLowering::getJumpTableEncoding
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Definition:TargetLowering.cpp:444
llvm::TargetLowering::canCreateUndefOrPoisonForTargetNode
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Definition:TargetLowering.cpp:3908
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition:TargetMachine.h:77
llvm::TargetMachine::getTLSModel
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
Definition:TargetMachine.cpp:238
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition:TargetMachine.h:126
llvm::TargetMachine::useTLSDESC
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
Definition:TargetMachine.cpp:236
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition:TargetMachine.h:216
llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition:TargetMachine.cpp:235
llvm::TargetMachine::getObjFileLowering
virtual TargetLoweringObjectFile * getObjFileLowering() const
Definition:TargetMachine.h:136
llvm::TargetRegisterClass
Definition:TargetRegisterInfo.h:44
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition:TargetRegisterInfo.h:235
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition:TargetSubtargetInfo.h:129
llvm::TargetSubtargetInfo::isRegisterReservedByUser
virtual bool isRegisterReservedByUser(Register R) const
Definition:TargetSubtargetInfo.h:354
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition:TargetSubtargetInfo.h:97
llvm::Target
Target - Wrapper for Target specific information.
Definition:TargetRegistry.h:144
llvm::Triple::isOSBinFormatCOFF
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition:Triple.h:755
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition:Twine.h:81
llvm::TypeSize
Definition:TypeSize.h:334
llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition:TypeSize.h:345
llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition:TypeSize.h:348
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
llvm::Type::getStructElementType
Type * getStructElementType(unsigned N) const
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition:Type.h:258
llvm::Type::isTargetExtTy
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition:Type.h:203
llvm::Type::isScalableTy
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition:Type.h:128
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition:Type.h:355
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition:Use.h:43
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition:Use.h:72
llvm::Use::getOperandNo
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition:Use.cpp:31
llvm::User
Definition:User.h:44
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition:User.h:228
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition:User.h:250
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition:Value.h:434
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition:Value.cpp:534
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition:Value.cpp:1075
llvm::VectorType
Base class of all SIMD vector types.
Definition:DerivedTypes.h:427
llvm::cl::opt
Definition:CommandLine.h:1423
llvm::details::FixedOrScalableQuantity::isKnownMultipleOf
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition:TypeSize.h:183
llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition:TypeSize.h:202
llvm::details::FixedOrScalableQuantity< TypeSize, uint64_t >::isKnownLE
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition:TypeSize.h:232
llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition:TypeSize.h:258
llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition:TypeSize.h:168
llvm::details::FixedOrScalableQuantity::isZero
constexpr bool isZero() const
Definition:TypeSize.h:156
llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition:TypeSize.h:254
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition:ilist_node.h:132
uint16_t
uint32_t
uint64_t
uint8_t
unsigned
INT64_MIN
#define INT64_MIN
Definition:DataTypes.h:74
ErrorHandling.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
false
Definition:StackSlotColoring.cpp:193
llvm::AArch64::RM
@ RM
Definition:AArch64ISelLowering.h:542
llvm::AArch64::Fixups
Fixups
Definition:AArch64FixupKinds.h:17
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition:AMDGPUMetadata.h:395
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::AMDGPU::Imm
@ Imm
Definition:AMDGPURegBankLegalizeRules.h:105
llvm::ARCISD::CMOV
@ CMOV
Definition:ARCISelLowering.h:43
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition:BitmaskEnum.h:125
llvm::COFF::Entry
@ Entry
Definition:COFF.h:844
llvm::CallingConv::RISCV_VectorCall
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition:CallingConv.h:268
llvm::CallingConv::GHC
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition:CallingConv.h:50
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition:CallingConv.h:144
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition:CallingConv.h:41
llvm::CallingConv::Tail
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition:CallingConv.h:76
llvm::CallingConv::GRAAL
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition:CallingConv.h:255
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::CodeModel::Medium
@ Medium
Definition:CodeGen.h:31
llvm::CodeModel::Large
@ Large
Definition:CodeGen.h:31
llvm::CodeModel::Small
@ Small
Definition:CodeGen.h:31
llvm::IRSimilarity::Legal
@ Legal
Definition:IRSimilarityIdentifier.h:76
llvm::ISD::isConstantSplatVectorAllOnes
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
Definition:SelectionDAG.cpp:188
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition:SelectionDAGNodes.h:3215
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition:ISDOpcodes.h:40
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition:ISDOpcodes.h:780
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition:ISDOpcodes.h:1197
llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition:ISDOpcodes.h:1193
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition:ISDOpcodes.h:753
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition:ISDOpcodes.h:491
llvm::ISD::STORE
@ STORE
Definition:ISDOpcodes.h:1103
llvm::ISD::LRINT
@ LRINT
Definition:ISDOpcodes.h:1021
llvm::ISD::DELETED_NODE
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition:ISDOpcodes.h:44
llvm::ISD::VECREDUCE_SEQ_FADD
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition:ISDOpcodes.h:1417
llvm::ISD::FP_TO_BF16
@ FP_TO_BF16
Definition:ISDOpcodes.h:974
llvm::ISD::JumpTable
@ JumpTable
Definition:ISDOpcodes.h:81
llvm::ISD::FLOG10
@ FLOG10
Definition:ISDOpcodes.h:1008
llvm::ISD::MLOAD
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition:ISDOpcodes.h:1360
llvm::ISD::VECREDUCE_SMIN
@ VECREDUCE_SMIN
Definition:ISDOpcodes.h:1450
llvm::ISD::SREM
@ SREM
Definition:ISDOpcodes.h:251
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition:ISDOpcodes.h:257
llvm::ISD::ATOMIC_LOAD_NAND
@ ATOMIC_LOAD_NAND
Definition:ISDOpcodes.h:1340
llvm::ISD::UDIV
@ UDIV
Definition:ISDOpcodes.h:250
llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition:ISDOpcodes.h:574
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition:ISDOpcodes.h:842
llvm::ISD::UMIN
@ UMIN
Definition:ISDOpcodes.h:699
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition:ISDOpcodes.h:744
llvm::ISD::ROTR
@ ROTR
Definition:ISDOpcodes.h:739
llvm::ISD::FPOW
@ FPOW
Definition:ISDOpcodes.h:994
llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition:ISDOpcodes.h:1226
llvm::ISD::ConstantFP
@ ConstantFP
Definition:ISDOpcodes.h:77
llvm::ISD::ATOMIC_LOAD_MAX
@ ATOMIC_LOAD_MAX
Definition:ISDOpcodes.h:1342
llvm::ISD::UADDO
@ UADDO
Definition:ISDOpcodes.h:331
llvm::ISD::FTRUNC
@ FTRUNC
Definition:ISDOpcodes.h:1013
llvm::ISD::SDIV
@ SDIV
Definition:ISDOpcodes.h:249
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition:ISDOpcodes.h:441
llvm::ISD::ATOMIC_LOAD_UMIN
@ ATOMIC_LOAD_UMIN
Definition:ISDOpcodes.h:1343
llvm::ISD::LLRINT
@ LLRINT
Definition:ISDOpcodes.h:1022
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition:ISDOpcodes.h:246
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition:ISDOpcodes.h:1102
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition:ISDOpcodes.h:412
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition:ISDOpcodes.h:814
llvm::ISD::FSUB
@ FSUB
Definition:ISDOpcodes.h:398
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition:ISDOpcodes.h:498
llvm::ISD::FABS
@ FABS
Definition:ISDOpcodes.h:982
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition:ISDOpcodes.h:1015
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition:ISDOpcodes.h:205
llvm::ISD::RETURNADDR
@ RETURNADDR
Definition:ISDOpcodes.h:101
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition:ISDOpcodes.h:78
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition:ISDOpcodes.h:841
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition:ISDOpcodes.h:558
llvm::ISD::VECREDUCE_FMAX
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition:ISDOpcodes.h:1435
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition:ISDOpcodes.h:397
llvm::ISD::VECREDUCE_FMAXIMUM
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition:ISDOpcodes.h:1439
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition:ISDOpcodes.h:717
llvm::ISD::MEMBARRIER
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition:ISDOpcodes.h:1299
llvm::ISD::FEXP10
@ FEXP10
Definition:ISDOpcodes.h:1011
llvm::ISD::ATOMIC_FENCE
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition:ISDOpcodes.h:1304
llvm::ISD::FP_TO_FP16
@ FP_TO_FP16
Definition:ISDOpcodes.h:965
llvm::ISD::UDIVREM
@ UDIVREM
Definition:ISDOpcodes.h:263
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition:ISDOpcodes.h:262
llvm::ISD::VECREDUCE_SMAX
@ VECREDUCE_SMAX
Definition:ISDOpcodes.h:1449
llvm::ISD::SRL
@ SRL
Definition:ISDOpcodes.h:737
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition:ISDOpcodes.h:492
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition:ISDOpcodes.h:1051
llvm::ISD::FP16_TO_FP
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition:ISDOpcodes.h:964
llvm::ISD::ATOMIC_LOAD_OR
@ ATOMIC_LOAD_OR
Definition:ISDOpcodes.h:1338
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition:ISDOpcodes.h:954
llvm::ISD::STRICT_FDIV
@ STRICT_FDIV
Definition:ISDOpcodes.h:410
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition:ISDOpcodes.h:236
llvm::ISD::ATOMIC_LOAD_XOR
@ ATOMIC_LOAD_XOR
Definition:ISDOpcodes.h:1339
llvm::ISD::FFLOOR
@ FFLOOR
Definition:ISDOpcodes.h:1018
llvm::ISD::INIT_TRAMPOLINE
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition:ISDOpcodes.h:1270
llvm::ISD::FLDEXP
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition:ISDOpcodes.h:997
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition:ISDOpcodes.h:418
llvm::ISD::BUILTIN_OP_END
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition:ISDOpcodes.h:1494
llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition:ISDOpcodes.h:79
llvm::ISD::SRA
@ SRA
Definition:ISDOpcodes.h:736
llvm::ISD::STRICT_FMUL
@ STRICT_FMUL
Definition:ISDOpcodes.h:409
llvm::ISD::LLROUND
@ LLROUND
Definition:ISDOpcodes.h:1020
llvm::ISD::SET_ROUNDING
@ SET_ROUNDING
Set rounding mode.
Definition:ISDOpcodes.h:936
llvm::ISD::USUBO
@ USUBO
Definition:ISDOpcodes.h:335
llvm::ISD::AVGFLOORU
@ AVGFLOORU
Definition:ISDOpcodes.h:681
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition:ISDOpcodes.h:805
llvm::ISD::FLOG2
@ FLOG2
Definition:ISDOpcodes.h:1007
llvm::ISD::AVGCEILS
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition:ISDOpcodes.h:685
llvm::ISD::STRICT_UINT_TO_FP
@ STRICT_UINT_TO_FP
Definition:ISDOpcodes.h:465
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition:ISDOpcodes.h:635
llvm::ISD::READSTEADYCOUNTER
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition:ISDOpcodes.h:1259
llvm::ISD::VECREDUCE_FADD
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition:ISDOpcodes.h:1432
llvm::ISD::UADDSAT
@ UADDSAT
Definition:ISDOpcodes.h:348
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition:ISDOpcodes.h:752
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition:ISDOpcodes.h:1032
llvm::ISD::FPOWI
@ FPOWI
Definition:ISDOpcodes.h:995
llvm::ISD::FRINT
@ FRINT
Definition:ISDOpcodes.h:1014
llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition:ISDOpcodes.h:1292
llvm::ISD::VECREDUCE_FMIN
@ VECREDUCE_FMIN
Definition:ISDOpcodes.h:1436
llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition:ISDOpcodes.h:1059
llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition:ISDOpcodes.h:446
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition:ISDOpcodes.h:981
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition:ISDOpcodes.h:1148
llvm::ISD::CTTZ
@ CTTZ
Definition:ISDOpcodes.h:745
llvm::ISD::ATOMIC_LOAD_MIN
@ ATOMIC_LOAD_MIN
Definition:ISDOpcodes.h:1341
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition:ISDOpcodes.h:888
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition:ISDOpcodes.h:1127
llvm::ISD::VECTOR_INTERLEAVE
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition:ISDOpcodes.h:601
llvm::ISD::STEP_VECTOR
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition:ISDOpcodes.h:661
llvm::ISD::OR
@ OR
Definition:ISDOpcodes.h:710
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition:ISDOpcodes.h:515
llvm::ISD::IS_FPCLASS
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition:ISDOpcodes.h:522
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition:ISDOpcodes.h:356
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition:ISDOpcodes.h:795
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition:ISDOpcodes.h:757
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition:ISDOpcodes.h:258
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition:ISDOpcodes.h:218
llvm::ISD::VECREDUCE_UMAX
@ VECREDUCE_UMAX
Definition:ISDOpcodes.h:1451
llvm::ISD::SPLAT_VECTOR
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition:ISDOpcodes.h:642
llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition:ISDOpcodes.h:1222
llvm::ISD::AVGCEILU
@ AVGCEILU
Definition:ISDOpcodes.h:686
llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition:ISDOpcodes.h:330
llvm::ISD::FROUND
@ FROUND
Definition:ISDOpcodes.h:1016
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition:ISDOpcodes.h:445
llvm::ISD::USUBSAT
@ USUBSAT
Definition:ISDOpcodes.h:357
llvm::ISD::VECREDUCE_ADD
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition:ISDOpcodes.h:1444
llvm::ISD::GET_ROUNDING
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition:ISDOpcodes.h:931
llvm::ISD::STRICT_FP_TO_FP16
@ STRICT_FP_TO_FP16
Definition:ISDOpcodes.h:967
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition:ISDOpcodes.h:674
llvm::ISD::STRICT_FP16_TO_FP
@ STRICT_FP16_TO_FP
Definition:ISDOpcodes.h:966
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition:ISDOpcodes.h:735
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition:ISDOpcodes.h:615
llvm::ISD::ATOMIC_LOAD_AND
@ ATOMIC_LOAD_AND
Definition:ISDOpcodes.h:1336
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition:ISDOpcodes.h:588
llvm::ISD::FCOS
@ FCOS
Definition:ISDOpcodes.h:986
llvm::ISD::XOR
@ XOR
Definition:ISDOpcodes.h:711
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition:ISDOpcodes.h:550
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition:ISDOpcodes.h:209
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition:ISDOpcodes.h:811
llvm::ISD::DEBUGTRAP
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition:ISDOpcodes.h:1282
llvm::ISD::FP_TO_UINT_SAT
@ FP_TO_UINT_SAT
Definition:ISDOpcodes.h:907
llvm::ISD::CTPOP
@ CTPOP
Definition:ISDOpcodes.h:747
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition:ISDOpcodes.h:772
llvm::ISD::FMUL
@ FMUL
Definition:ISDOpcodes.h:399
llvm::ISD::MSTORE
@ MSTORE
Definition:ISDOpcodes.h:1361
llvm::ISD::VSCALE
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition:ISDOpcodes.h:1407
llvm::ISD::VECREDUCE_XOR
@ VECREDUCE_XOR
Definition:ISDOpcodes.h:1448
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition:ISDOpcodes.h:1319
llvm::ISD::ATOMIC_LOAD_UMAX
@ ATOMIC_LOAD_UMAX
Definition:ISDOpcodes.h:1344
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition:ISDOpcodes.h:796
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition:ISDOpcodes.h:1031
llvm::ISD::SUB
@ SUB
Definition:ISDOpcodes.h:247
llvm::ISD::MULHS
@ MULHS
Definition:ISDOpcodes.h:675
llvm::ISD::VECREDUCE_AND
@ VECREDUCE_AND
Definition:ISDOpcodes.h:1446
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition:ISDOpcodes.h:1112
llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition:ISDOpcodes.h:448
llvm::ISD::ConstantPool
@ ConstantPool
Definition:ISDOpcodes.h:82
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition:ISDOpcodes.h:849
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition:ISDOpcodes.h:697
llvm::ISD::Constant
@ Constant
Definition:ISDOpcodes.h:76
llvm::ISD::VECTOR_REVERSE
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition:ISDOpcodes.h:606
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition:ISDOpcodes.h:939
llvm::ISD::VECREDUCE_OR
@ VECREDUCE_OR
Definition:ISDOpcodes.h:1447
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition:ISDOpcodes.h:443
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition:ISDOpcodes.h:766
llvm::ISD::MSCATTER
@ MSCATTER
Definition:ISDOpcodes.h:1373
llvm::ISD::STRICT_SINT_TO_FP
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition:ISDOpcodes.h:464
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition:ISDOpcodes.h:1017
llvm::ISD::MGATHER
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition:ISDOpcodes.h:1372
llvm::ISD::VECREDUCE_UMIN
@ VECREDUCE_UMIN
Definition:ISDOpcodes.h:1452
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition:ISDOpcodes.h:442
llvm::ISD::STRICT_FROUNDEVEN
@ STRICT_FROUNDEVEN
Definition:ISDOpcodes.h:444
llvm::ISD::EH_DWARF_CFA
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition:ISDOpcodes.h:135
llvm::ISD::FDIV
@ FDIV
Definition:ISDOpcodes.h:400
llvm::ISD::BF16_TO_FP
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition:ISDOpcodes.h:973
llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition:ISDOpcodes.h:100
llvm::ISD::FREM
@ FREM
Definition:ISDOpcodes.h:401
llvm::ISD::ATOMIC_LOAD_ADD
@ ATOMIC_LOAD_ADD
Definition:ISDOpcodes.h:1334
llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition:ISDOpcodes.h:458
llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition:ISDOpcodes.h:480
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition:ISDOpcodes.h:457
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition:ISDOpcodes.h:1050
llvm::ISD::ATOMIC_LOAD_SUB
@ ATOMIC_LOAD_SUB
Definition:ISDOpcodes.h:1335
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition:ISDOpcodes.h:887
llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition:ISDOpcodes.h:1253
llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition:ISDOpcodes.h:485
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition:ISDOpcodes.h:709
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition:ISDOpcodes.h:1279
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition:ISDOpcodes.h:190
llvm::ISD::FLOG
@ FLOG
Definition:ISDOpcodes.h:1006
llvm::ISD::AVGFLOORS
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition:ISDOpcodes.h:680
llvm::ISD::STRICT_FADD
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition:ISDOpcodes.h:407
llvm::ISD::UREM
@ UREM
Definition:ISDOpcodes.h:252
llvm::ISD::SPLAT_VECTOR_PARTS
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition:ISDOpcodes.h:651
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition:ISDOpcodes.h:539
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition:ISDOpcodes.h:52
llvm::ISD::FSIN
@ FSIN
Definition:ISDOpcodes.h:985
llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition:ISDOpcodes.h:449
llvm::ISD::FEXP
@ FEXP
Definition:ISDOpcodes.h:1009
llvm::ISD::VECTOR_SPLICE
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition:ISDOpcodes.h:627
llvm::ISD::FCEIL
@ FCEIL
Definition:ISDOpcodes.h:1012
llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition:ISDOpcodes.h:408
llvm::ISD::ATOMIC_SWAP
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition:ISDOpcodes.h:1333
llvm::ISD::MUL
@ MUL
Definition:ISDOpcodes.h:248
llvm::ISD::FFREXP
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition:ISDOpcodes.h:1004
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition:ISDOpcodes.h:920
llvm::ISD::VECTOR_COMPRESS
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition:ISDOpcodes.h:669
llvm::ISD::LROUND
@ LROUND
Definition:ISDOpcodes.h:1019
llvm::ISD::CTLZ
@ CTLZ
Definition:ISDOpcodes.h:746
llvm::ISD::FMAXIMUMNUM
@ FMAXIMUMNUM
Definition:ISDOpcodes.h:1056
llvm::ISD::CLEAR_CACHE
@ CLEAR_CACHE
Definition:ISDOpcodes.h:1490
llvm::ISD::STRICT_FLDEXP
@ STRICT_FLDEXP
Definition:ISDOpcodes.h:421
llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition:ISDOpcodes.h:447
llvm::ISD::VASTART
@ VASTART
Definition:ISDOpcodes.h:1227
llvm::ISD::FSQRT
@ FSQRT
Definition:ISDOpcodes.h:983
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition:ISDOpcodes.h:438
llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition:ISDOpcodes.h:906
llvm::ISD::VECREDUCE_FMINIMUM
@ VECREDUCE_FMINIMUM
Definition:ISDOpcodes.h:1440
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition:ISDOpcodes.h:817
llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition:ISDOpcodes.h:1217
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition:ISDOpcodes.h:1141
llvm::ISD::ROTL
@ ROTL
Definition:ISDOpcodes.h:738
llvm::ISD::BlockAddress
@ BlockAddress
Definition:ISDOpcodes.h:84
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition:ISDOpcodes.h:794
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition:ISDOpcodes.h:748
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition:ISDOpcodes.h:508
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition:ISDOpcodes.h:347
llvm::ISD::FEXP2
@ FEXP2
Definition:ISDOpcodes.h:1010
llvm::ISD::SMAX
@ SMAX
Definition:ISDOpcodes.h:698
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition:ISDOpcodes.h:437
llvm::ISD::VECTOR_DEINTERLEAVE
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition:ISDOpcodes.h:595
llvm::ISD::UMAX
@ UMAX
Definition:ISDOpcodes.h:700
llvm::ISD::FMINIMUMNUM
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition:ISDOpcodes.h:1055
llvm::ISD::TRUNCATE_SSAT_S
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition:ISDOpcodes.h:832
llvm::ISD::ABDS
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition:ISDOpcodes.h:692
llvm::ISD::ADJUST_TRAMPOLINE
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition:ISDOpcodes.h:1276
llvm::ISD::TRUNCATE_USAT_U
@ TRUNCATE_USAT_U
Definition:ISDOpcodes.h:836
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition:ISDOpcodes.h:198
llvm::ISD::ABDU
@ ABDU
Definition:ISDOpcodes.h:693
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition:ISDOpcodes.h:530
llvm::ISD::isBuildVectorOfConstantSDNodes
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
Definition:SelectionDAG.cpp:287
llvm::ISD::isNormalStore
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Definition:SelectionDAGNodes.h:3246
llvm::ISD::isConstantSplatVectorAllZeros
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
Definition:SelectionDAG.cpp:237
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition:SelectionDAG.cpp:639
llvm::ISD::getVPMaskIdx
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
Definition:SelectionDAG.cpp:553
llvm::ISD::getVPExplicitVectorLengthIdx
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
Definition:SelectionDAG.cpp:565
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition:SelectionDAG.cpp:616
llvm::ISD::MemIndexType
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition:ISDOpcodes.h:1572
llvm::ISD::UNSIGNED_SCALED
@ UNSIGNED_SCALED
Definition:ISDOpcodes.h:1572
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition:SelectionDAG.cpp:283
llvm::ISD::isConstantSplatVector
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
Definition:SelectionDAG.cpp:153
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition:ISDOpcodes.h:1559
llvm::ISD::POST_INC
@ POST_INC
Definition:ISDOpcodes.h:1559
llvm::ISD::PRE_INC
@ PRE_INC
Definition:ISDOpcodes.h:1559
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition:ISDOpcodes.h:1559
llvm::ISD::isBuildVectorOfConstantFPSDNodes
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
Definition:SelectionDAG.cpp:300
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition:ISDOpcodes.h:1610
llvm::ISD::SETOEQ
@ SETOEQ
Definition:ISDOpcodes.h:1613
llvm::ISD::SETUNE
@ SETUNE
Definition:ISDOpcodes.h:1626
llvm::ISD::SETUEQ
@ SETUEQ
Definition:ISDOpcodes.h:1621
llvm::ISD::SETOLE
@ SETOLE
Definition:ISDOpcodes.h:1617
llvm::ISD::SETOLT
@ SETOLT
Definition:ISDOpcodes.h:1616
llvm::ISD::SETNE
@ SETNE
Definition:ISDOpcodes.h:1635
llvm::ISD::SETUGT
@ SETUGT
Definition:ISDOpcodes.h:1622
llvm::ISD::SETOGT
@ SETOGT
Definition:ISDOpcodes.h:1614
llvm::ISD::SETULT
@ SETULT
Definition:ISDOpcodes.h:1624
llvm::ISD::SETUO
@ SETUO
Definition:ISDOpcodes.h:1620
llvm::ISD::SETONE
@ SETONE
Definition:ISDOpcodes.h:1618
llvm::ISD::SETGT
@ SETGT
Definition:ISDOpcodes.h:1631
llvm::ISD::SETLT
@ SETLT
Definition:ISDOpcodes.h:1633
llvm::ISD::SETO
@ SETO
Definition:ISDOpcodes.h:1619
llvm::ISD::SETGE
@ SETGE
Definition:ISDOpcodes.h:1632
llvm::ISD::SETUGE
@ SETUGE
Definition:ISDOpcodes.h:1623
llvm::ISD::SETLE
@ SETLE
Definition:ISDOpcodes.h:1634
llvm::ISD::SETULE
@ SETULE
Definition:ISDOpcodes.h:1625
llvm::ISD::SETOGE
@ SETOGE
Definition:ISDOpcodes.h:1615
llvm::ISD::SETEQ
@ SETEQ
Definition:ISDOpcodes.h:1630
llvm::ISD::isBuildVectorAllOnes
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
Definition:SelectionDAG.cpp:279
llvm::ISD::getVecReduceBaseOpcode
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
Definition:SelectionDAG.cpp:448
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition:ISDOpcodes.h:1590
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition:ISDOpcodes.h:1590
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition:ISDOpcodes.h:1590
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition:ISDOpcodes.h:1590
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition:ISDOpcodes.h:1590
llvm::ISD::isVPOpcode
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
Definition:SelectionDAG.cpp:504
llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition:SelectionDAGNodes.h:3208
llvm::ISD::isIntEqualitySetCC
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition:ISDOpcodes.h:1655
llvm::Intrinsic::getOrInsertDeclaration
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition:Intrinsics.cpp:732
llvm::Intrinsic::ID
unsigned ID
Definition:GenericSSAContext.h:28
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition:LegacyLegalizerInfo.h:55
llvm::M68k::MemAddrModeKind::V
@ V
llvm::M68k::MemAddrModeKind::L
@ L
llvm::MipsISD::Ext
@ Ext
Definition:MipsISelLowering.h:157
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition:NVPTX.h:162
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition:NVPTX.h:163
llvm::NVPTX::Const
@ Const
Definition:NVPTX.h:147
llvm::RISCVABI::ABI
ABI
Definition:RISCVBaseInfo.h:503
llvm::RISCVABI::ABI_ILP32D
@ ABI_ILP32D
Definition:RISCVBaseInfo.h:506
llvm::RISCVABI::ABI_LP64F
@ ABI_LP64F
Definition:RISCVBaseInfo.h:509
llvm::RISCVABI::ABI_ILP32F
@ ABI_ILP32F
Definition:RISCVBaseInfo.h:505
llvm::RISCVABI::ABI_ILP32
@ ABI_ILP32
Definition:RISCVBaseInfo.h:504
llvm::RISCVABI::ABI_Unknown
@ ABI_Unknown
Definition:RISCVBaseInfo.h:512
llvm::RISCVABI::ABI_ILP32E
@ ABI_ILP32E
Definition:RISCVBaseInfo.h:507
llvm::RISCVABI::ABI_LP64E
@ ABI_LP64E
Definition:RISCVBaseInfo.h:511
llvm::RISCVABI::ABI_LP64
@ ABI_LP64
Definition:RISCVBaseInfo.h:508
llvm::RISCVABI::ABI_LP64D
@ ABI_LP64D
Definition:RISCVBaseInfo.h:510
llvm::RISCVCC::CondCode
CondCode
Definition:RISCVInstrInfo.h:37
llvm::RISCVFPRndMode::RoundingMode
RoundingMode
Definition:RISCVBaseInfo.h:380
llvm::RISCVFPRndMode::RUP
@ RUP
Definition:RISCVBaseInfo.h:384
llvm::RISCVFPRndMode::DYN
@ DYN
Definition:RISCVBaseInfo.h:386
llvm::RISCVFPRndMode::RTZ
@ RTZ
Definition:RISCVBaseInfo.h:382
llvm::RISCVFPRndMode::RDN
@ RDN
Definition:RISCVBaseInfo.h:383
llvm::RISCVFPRndMode::RMM
@ RMM
Definition:RISCVBaseInfo.h:385
llvm::RISCVFPRndMode::Invalid
@ Invalid
Definition:RISCVBaseInfo.h:387
llvm::RISCVFPRndMode::RNE
@ RNE
Definition:RISCVBaseInfo.h:381
llvm::RISCVII::MASK_AGNOSTIC
@ MASK_AGNOSTIC
Definition:RISCVTargetParser.h:83
llvm::RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
Definition:RISCVTargetParser.h:81
llvm::RISCVII::TAIL_AGNOSTIC
@ TAIL_AGNOSTIC
Definition:RISCVTargetParser.h:82
llvm::RISCVII::getLMul
static VLMUL getLMul(uint64_t TSFlags)
Definition:RISCVBaseInfo.h:148
llvm::RISCVII::MO_TPREL_HI
@ MO_TPREL_HI
Definition:RISCVBaseInfo.h:279
llvm::RISCVII::MO_CALL
@ MO_CALL
Definition:RISCVBaseInfo.h:272
llvm::RISCVII::MO_TPREL_LO
@ MO_TPREL_LO
Definition:RISCVBaseInfo.h:278
llvm::RISCVII::MO_HI
@ MO_HI
Definition:RISCVBaseInfo.h:274
llvm::RISCVII::MO_LO
@ MO_LO
Definition:RISCVBaseInfo.h:273
llvm::RISCVII::MO_TPREL_ADD
@ MO_TPREL_ADD
Definition:RISCVBaseInfo.h:280
llvm::RISCVII::getFRMOpNum
static int getFRMOpNum(const MCInstrDesc &Desc)
Definition:RISCVBaseInfo.h:233
llvm::RISCVII::VLMUL
VLMUL
Definition:RISCVTargetParser.h:69
llvm::RISCVII::LMUL_1
@ LMUL_1
Definition:RISCVTargetParser.h:70
llvm::RISCVII::LMUL_F8
@ LMUL_F8
Definition:RISCVTargetParser.h:75
llvm::RISCVII::LMUL_4
@ LMUL_4
Definition:RISCVTargetParser.h:72
llvm::RISCVII::LMUL_8
@ LMUL_8
Definition:RISCVTargetParser.h:73
llvm::RISCVII::LMUL_F4
@ LMUL_F4
Definition:RISCVTargetParser.h:76
llvm::RISCVII::LMUL_F2
@ LMUL_F2
Definition:RISCVTargetParser.h:77
llvm::RISCVII::LMUL_2
@ LMUL_2
Definition:RISCVTargetParser.h:71
llvm::RISCVII::getSEWOpNum
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
Definition:RISCVBaseInfo.h:217
llvm::RISCVISD::NodeType
NodeType
Definition:RISCVISelLowering.h:31
llvm::RISCVISD::SF_VC_V_FV_SE
@ SF_VC_V_FV_SE
Definition:RISCVISelLowering.h:454
llvm::RISCVISD::SRLW
@ SRLW
Definition:RISCVISelLowering.h:94
llvm::RISCVISD::SplitF64
@ SplitF64
Turns a f64 into a pair of i32s.
Definition:RISCVISelLowering.h:69
llvm::RISCVISD::DIVW
@ DIVW
Definition:RISCVISelLowering.h:98
llvm::RISCVISD::SW_GUARDED_BRIND
@ SW_GUARDED_BRIND
Definition:RISCVISelLowering.h:432
llvm::RISCVISD::SF_VC_V_X_SE
@ SF_VC_V_X_SE
Definition:RISCVISelLowering.h:449
llvm::RISCVISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
Definition:RISCVISelLowering.h:44
llvm::RISCVISD::SF_VC_V_VVW_SE
@ SF_VC_V_VVW_SE
Definition:RISCVISelLowering.h:461
llvm::RISCVISD::SHA256SIG0
@ SHA256SIG0
Definition:RISCVISelLowering.h:172
llvm::RISCVISD::VWMULU_VL
@ VWMULU_VL
Definition:RISCVISelLowering.h:348
llvm::RISCVISD::SLLW
@ SLLW
Definition:RISCVISelLowering.h:92
llvm::RISCVISD::WRITE_CSR
@ WRITE_CSR
Definition:RISCVISelLowering.h:418
llvm::RISCVISD::FSGNJX
@ FSGNJX
Definition:RISCVISelLowering.h:150
llvm::RISCVISD::VMV_V_X_VL
@ VMV_V_X_VL
Definition:RISCVISelLowering.h:188
llvm::RISCVISD::VWSUB_W_VL
@ VWSUB_W_VL
Definition:RISCVISelLowering.h:356
llvm::RISCVISD::VMSET_VL
@ VMSET_VL
Definition:RISCVISelLowering.h:387
llvm::RISCVISD::SUB_VL
@ SUB_VL
Definition:RISCVISelLowering.h:276
llvm::RISCVISD::FABS_VL
@ FABS_VL
Definition:RISCVISelLowering.h:316
llvm::RISCVISD::VWADDU_VL
@ VWADDU_VL
Definition:RISCVISelLowering.h:351
llvm::RISCVISD::FCLASS_VL
@ FCLASS_VL
Definition:RISCVISelLowering.h:318
llvm::RISCVISD::STRICT_VFNMSUB_VL
@ STRICT_VFNMSUB_VL
Definition:RISCVISelLowering.h:485
llvm::RISCVISD::STRICT_UINT_TO_FP_VL
@ STRICT_UINT_TO_FP_VL
Definition:RISCVISelLowering.h:490
llvm::RISCVISD::CTTZ_VL
@ CTTZ_VL
Definition:RISCVISelLowering.h:288
llvm::RISCVISD::STRICT_SINT_TO_FP_VL
@ STRICT_SINT_TO_FP_VL
Definition:RISCVISelLowering.h:489
llvm::RISCVISD::CZERO_EQZ
@ CZERO_EQZ
Definition:RISCVISelLowering.h:427
llvm::RISCVISD::SM4ED
@ SM4ED
Definition:RISCVISelLowering.h:173
llvm::RISCVISD::FMUL_VL
@ FMUL_VL
Definition:RISCVISelLowering.h:309
llvm::RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL
@ STRICT_VFCVT_RTZ_XU_F_VL
Definition:RISCVISelLowering.h:493
llvm::RISCVISD::VSLIDE1DOWN_VL
@ VSLIDE1DOWN_VL
Definition:RISCVISelLowering.h:223
llvm::RISCVISD::STRICT_FSETCC_VL
@ STRICT_FSETCC_VL
Definition:RISCVISelLowering.h:494
llvm::RISCVISD::RET_GLUE
@ RET_GLUE
Definition:RISCVISelLowering.h:33
llvm::RISCVISD::FROUND
@ FROUND
Definition:RISCVISelLowering.h:147
llvm::RISCVISD::CTZW
@ CTZW
Definition:RISCVISelLowering.h:108
llvm::RISCVISD::VWSLL_VL
@ VWSLL_VL
Definition:RISCVISelLowering.h:358
llvm::RISCVISD::ROLW
@ ROLW
Definition:RISCVISelLowering.h:103
llvm::RISCVISD::VSEXT_VL
@ VSEXT_VL
Definition:RISCVISelLowering.h:396
llvm::RISCVISD::SMIN_VL
@ SMIN_VL
Definition:RISCVISelLowering.h:280
llvm::RISCVISD::VMAND_VL
@ VMAND_VL
Definition:RISCVISelLowering.h:381
llvm::RISCVISD::FP_EXTEND_VL
@ FP_EXTEND_VL
Definition:RISCVISelLowering.h:330
llvm::RISCVISD::CLMUL
@ CLMUL
Definition:RISCVISelLowering.h:171
llvm::RISCVISD::FCLASS
@ FCLASS
Definition:RISCVISelLowering.h:149
llvm::RISCVISD::VECREDUCE_UMAX_VL
@ VECREDUCE_UMAX_VL
Definition:RISCVISelLowering.h:251
llvm::RISCVISD::CTLZ_VL
@ CTLZ_VL
Definition:RISCVISelLowering.h:287
llvm::RISCVISD::TAIL
@ TAIL
Definition:RISCVISelLowering.h:37
llvm::RISCVISD::VECREDUCE_XOR_VL
@ VECREDUCE_XOR_VL
Definition:RISCVISelLowering.h:257
llvm::RISCVISD::MUL_VL
@ MUL_VL
Definition:RISCVISelLowering.h:267
llvm::RISCVISD::VFSLIDE1UP_VL
@ VFSLIDE1UP_VL
Definition:RISCVISelLowering.h:228
llvm::RISCVISD::TRUNCATE_VECTOR_VL_USAT
@ TRUNCATE_VECTOR_VL_USAT
Definition:RISCVISelLowering.h:212
llvm::RISCVISD::OR_VL
@ OR_VL
Definition:RISCVISelLowering.h:268
llvm::RISCVISD::VFMV_V_F_VL
@ VFMV_V_F_VL
Definition:RISCVISelLowering.h:192
llvm::RISCVISD::SHA256SUM0
@ SHA256SUM0
Definition:RISCVISelLowering.h:172
llvm::RISCVISD::ORC_B
@ ORC_B
Definition:RISCVISelLowering.h:166
llvm::RISCVISD::RORW
@ RORW
Definition:RISCVISelLowering.h:104
llvm::RISCVISD::TH_SWD
@ TH_SWD
Definition:RISCVISelLowering.h:503
llvm::RISCVISD::FMV_X_SIGNEXTH
@ FMV_X_SIGNEXTH
Definition:RISCVISelLowering.h:126
llvm::RISCVISD::VFMV_S_F_VL
@ VFMV_S_F_VL
Definition:RISCVISelLowering.h:199
llvm::RISCVISD::CLMULR
@ CLMULR
Definition:RISCVISelLowering.h:171
llvm::RISCVISD::HI
@ HI
Definition:RISCVISelLowering.h:74
llvm::RISCVISD::VWSUBU_VL
@ VWSUBU_VL
Definition:RISCVISelLowering.h:353
llvm::RISCVISD::VECREDUCE_ADD_VL
@ VECREDUCE_ADD_VL
Definition:RISCVISelLowering.h:250
llvm::RISCVISD::VMV_X_S
@ VMV_X_S
Definition:RISCVISelLowering.h:195
llvm::RISCVISD::SHA256SUM1
@ SHA256SUM1
Definition:RISCVISelLowering.h:172
llvm::RISCVISD::VECREDUCE_OR_VL
@ VECREDUCE_OR_VL
Definition:RISCVISelLowering.h:256
llvm::RISCVISD::SF_VC_V_FVV_SE
@ SF_VC_V_FVV_SE
Definition:RISCVISelLowering.h:458
llvm::RISCVISD::VMERGE_VL
@ VMERGE_VL
Definition:RISCVISelLowering.h:378
llvm::RISCVISD::FIRST_STRICTFP_OPCODE
@ FIRST_STRICTFP_OPCODE
Definition:RISCVISelLowering.h:474
llvm::RISCVISD::UINT_TO_FP_VL
@ UINT_TO_FP_VL
Definition:RISCVISelLowering.h:326
llvm::RISCVISD::SF_VC_V_XVW_SE
@ SF_VC_V_XVW_SE
Definition:RISCVISelLowering.h:459
llvm::RISCVISD::VWADD_W_VL
@ VWADD_W_VL
Definition:RISCVISelLowering.h:354
llvm::RISCVISD::VFMADD_VL
@ VFMADD_VL
Definition:RISCVISelLowering.h:333
llvm::RISCVISD::VRGATHER_VX_VL
@ VRGATHER_VX_VL
Definition:RISCVISelLowering.h:391
llvm::RISCVISD::MOPR
@ MOPR
Definition:RISCVISelLowering.h:177
llvm::RISCVISD::VSLIDEDOWN_VL
@ VSLIDEDOWN_VL
Definition:RISCVISelLowering.h:218
llvm::RISCVISD::VMV_S_X_VL
@ VMV_S_X_VL
Definition:RISCVISelLowering.h:197
llvm::RISCVISD::CLZW
@ CLZW
Definition:RISCVISelLowering.h:107
llvm::RISCVISD::SINT_TO_FP_VL
@ SINT_TO_FP_VL
Definition:RISCVISelLowering.h:325
llvm::RISCVISD::BR_CC
@ BR_CC
Definition:RISCVISelLowering.h:45
llvm::RISCVISD::VID_VL
@ VID_VL
Definition:RISCVISelLowering.h:232
llvm::RISCVISD::SF_VC_V_XVV_SE
@ SF_VC_V_XVV_SE
Definition:RISCVISelLowering.h:455
llvm::RISCVISD::VMOR_VL
@ VMOR_VL
Definition:RISCVISelLowering.h:382
llvm::RISCVISD::SMAX_VL
@ SMAX_VL
Definition:RISCVISelLowering.h:281
llvm::RISCVISD::FCVT_W_RV64
@ FCVT_W_RV64
Definition:RISCVISelLowering.h:139
llvm::RISCVISD::VWADD_VL
@ VWADD_VL
Definition:RISCVISelLowering.h:350
llvm::RISCVISD::XOR_VL
@ XOR_VL
Definition:RISCVISelLowering.h:279
llvm::RISCVISD::SRAW
@ SRAW
Definition:RISCVISelLowering.h:93
llvm::RISCVISD::SF_VC_FVW_SE
@ SF_VC_FVW_SE
Definition:RISCVISelLowering.h:448
llvm::RISCVISD::SF_VC_V_FVW_SE
@ SF_VC_V_FVW_SE
Definition:RISCVISelLowering.h:462
llvm::RISCVISD::FCVT_WU_RV64
@ FCVT_WU_RV64
Definition:RISCVISelLowering.h:140
llvm::RISCVISD::SF_VC_V_VV_SE
@ SF_VC_V_VV_SE
Definition:RISCVISelLowering.h:453
llvm::RISCVISD::VWSUBU_W_VL
@ VWSUBU_W_VL
Definition:RISCVISelLowering.h:357
llvm::RISCVISD::SF_VC_V_I_SE
@ SF_VC_V_I_SE
Definition:RISCVISelLowering.h:450
llvm::RISCVISD::SF_VC_VVV_SE
@ SF_VC_VVV_SE
Definition:RISCVISelLowering.h:443
llvm::RISCVISD::STRICT_FCVT_W_RV64
@ STRICT_FCVT_W_RV64
Definition:RISCVISelLowering.h:475
llvm::RISCVISD::VFWSUB_W_VL
@ VFWSUB_W_VL
Definition:RISCVISelLowering.h:364
llvm::RISCVISD::BuildPairF64
@ BuildPairF64
Turns a pair of i32s into an f64.
Definition:RISCVISelLowering.h:63
llvm::RISCVISD::VWMUL_VL
@ VWMUL_VL
Definition:RISCVISelLowering.h:347
llvm::RISCVISD::VMXOR_VL
@ VMXOR_VL
Definition:RISCVISelLowering.h:383
llvm::RISCVISD::VECREDUCE_FADD_VL
@ VECREDUCE_FADD_VL
Definition:RISCVISelLowering.h:258
llvm::RISCVISD::SF_VC_V_IVV_SE
@ SF_VC_V_IVV_SE
Definition:RISCVISelLowering.h:456
llvm::RISCVISD::SF_VC_V_IVW_SE
@ SF_VC_V_IVW_SE
Definition:RISCVISelLowering.h:460
llvm::RISCVISD::BuildGPRPair
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
Definition:RISCVISelLowering.h:51
llvm::RISCVISD::VWMACCU_VL
@ VWMACCU_VL
Definition:RISCVISelLowering.h:369
llvm::RISCVISD::CZERO_NEZ
@ CZERO_NEZ
Definition:RISCVISelLowering.h:428
llvm::RISCVISD::LLA
@ LLA
Definition:RISCVISelLowering.h:77
llvm::RISCVISD::STRICT_FSETCCS_VL
@ STRICT_FSETCCS_VL
Definition:RISCVISelLowering.h:495
llvm::RISCVISD::SHL_VL
@ SHL_VL
Definition:RISCVISelLowering.h:270
llvm::RISCVISD::STRICT_VFMADD_VL
@ STRICT_VFMADD_VL
Definition:RISCVISelLowering.h:482
llvm::RISCVISD::SHA256SIG1
@ SHA256SIG1
Definition:RISCVISelLowering.h:172
llvm::RISCVISD::FSQRT_VL
@ FSQRT_VL
Definition:RISCVISelLowering.h:317
llvm::RISCVISD::FMV_H_X
@ FMV_H_X
Definition:RISCVISelLowering.h:124
llvm::RISCVISD::SF_VC_XV_SE
@ SF_VC_XV_SE
Definition:RISCVISelLowering.h:437
llvm::RISCVISD::VFWMUL_VL
@ VFWMUL_VL
Definition:RISCVISelLowering.h:360
llvm::RISCVISD::VWMACC_VL
@ VWMACC_VL
Definition:RISCVISelLowering.h:368
llvm::RISCVISD::VFIRST_VL
@ VFIRST_VL
Definition:RISCVISelLowering.h:403
llvm::RISCVISD::VWMULSU_VL
@ VWMULSU_VL
Definition:RISCVISelLowering.h:349
llvm::RISCVISD::VWSUB_VL
@ VWSUB_VL
Definition:RISCVISelLowering.h:352
llvm::RISCVISD::VFWNMSUB_VL
@ VFWNMSUB_VL
Definition:RISCVISelLowering.h:343
llvm::RISCVISD::VFCVT_RM_XU_F_VL
@ VFCVT_RM_XU_F_VL
Definition:RISCVISelLowering.h:324
llvm::RISCVISD::VECREDUCE_SMAX_VL
@ VECREDUCE_SMAX_VL
Definition:RISCVISelLowering.h:252
llvm::RISCVISD::SRA_VL
@ SRA_VL
Definition:RISCVISelLowering.h:272
llvm::RISCVISD::TH_LWUD
@ TH_LWUD
Definition:RISCVISelLowering.h:501
llvm::RISCVISD::SM3P1
@ SM3P1
Definition:RISCVISelLowering.h:174
llvm::RISCVISD::VECREDUCE_FMAX_VL
@ VECREDUCE_FMAX_VL
Definition:RISCVISelLowering.h:261
llvm::RISCVISD::VECREDUCE_AND_VL
@ VECREDUCE_AND_VL
Definition:RISCVISelLowering.h:255
llvm::RISCVISD::VFNCVT_ROD_VL
@ VFNCVT_ROD_VL
Definition:RISCVISelLowering.h:237
llvm::RISCVISD::LAST_VL_VECTOR_OP
@ LAST_VL_VECTOR_OP
Definition:RISCVISelLowering.h:405
llvm::RISCVISD::STRICT_VFROUND_NOEXCEPT_VL
@ STRICT_VFROUND_NOEXCEPT_VL
Definition:RISCVISelLowering.h:496
llvm::RISCVISD::VFWMSUB_VL
@ VFWMSUB_VL
Definition:RISCVISelLowering.h:342
llvm::RISCVISD::VMV_V_V_VL
@ VMV_V_V_VL
Definition:RISCVISelLowering.h:184
llvm::RISCVISD::FMAX
@ FMAX
Definition:RISCVISelLowering.h:153
llvm::RISCVISD::VSLIDE1UP_VL
@ VSLIDE1UP_VL
Definition:RISCVISelLowering.h:222
llvm::RISCVISD::VFMAX_VL
@ VFMAX_VL
Definition:RISCVISelLowering.h:312
llvm::RISCVISD::STRICT_VFNMADD_VL
@ STRICT_VFNMADD_VL
Definition:RISCVISelLowering.h:483
llvm::RISCVISD::VFCVT_RM_X_F_VL
@ VFCVT_RM_X_F_VL
Definition:RISCVISelLowering.h:323
llvm::RISCVISD::VCPOP_VL
@ VCPOP_VL
Definition:RISCVISelLowering.h:400
llvm::RISCVISD::SF_VC_FV_SE
@ SF_VC_FV_SE
Definition:RISCVISelLowering.h:440
llvm::RISCVISD::CALL
@ CALL
Definition:RISCVISelLowering.h:36
llvm::RISCVISD::READ_VLENB
@ READ_VLENB
Definition:RISCVISelLowering.h:408
llvm::RISCVISD::FCVT_X
@ FCVT_X
Definition:RISCVISelLowering.h:133
llvm::RISCVISD::READ_CSR
@ READ_CSR
Definition:RISCVISelLowering.h:413
llvm::RISCVISD::VWMACCSU_VL
@ VWMACCSU_VL
Definition:RISCVISelLowering.h:370
llvm::RISCVISD::FIRST_NUMBER
@ FIRST_NUMBER
Definition:RISCVISelLowering.h:32
llvm::RISCVISD::AND_VL
@ AND_VL
Definition:RISCVISelLowering.h:266
llvm::RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL
@ SPLAT_VECTOR_SPLIT_I64_VL
Definition:RISCVISelLowering.h:203
llvm::RISCVISD::SM4KS
@ SM4KS
Definition:RISCVISelLowering.h:173
llvm::RISCVISD::SplitGPRPair
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
Definition:RISCVISelLowering.h:57
llvm::RISCVISD::VFSLIDE1DOWN_VL
@ VFSLIDE1DOWN_VL
Definition:RISCVISelLowering.h:229
llvm::RISCVISD::READ_COUNTER_WIDE
@ READ_COUNTER_WIDE
Definition:RISCVISelLowering.h:161
llvm::RISCVISD::TRUNCATE_VECTOR_VL
@ TRUNCATE_VECTOR_VL
Definition:RISCVISelLowering.h:206
llvm::RISCVISD::MOPRR
@ MOPRR
Definition:RISCVISelLowering.h:177
llvm::RISCVISD::SF_VC_V_VVV_SE
@ SF_VC_V_VVV_SE
Definition:RISCVISelLowering.h:457
llvm::RISCVISD::TH_LWD
@ TH_LWD
Definition:RISCVISelLowering.h:500
llvm::RISCVISD::VZEXT_VL
@ VZEXT_VL
Definition:RISCVISelLowering.h:397
llvm::RISCVISD::ADD_TPREL
@ ADD_TPREL
Definition:RISCVISelLowering.h:80
llvm::RISCVISD::TRUNCATE_VECTOR_VL_SSAT
@ TRUNCATE_VECTOR_VL_SSAT
Definition:RISCVISelLowering.h:211
llvm::RISCVISD::STRICT_FADD_VL
@ STRICT_FADD_VL
Definition:RISCVISelLowering.h:477
llvm::RISCVISD::SF_VC_XVW_SE
@ SF_VC_XVW_SE
Definition:RISCVISelLowering.h:445
llvm::RISCVISD::SETCC_VL
@ SETCC_VL
Definition:RISCVISelLowering.h:374
llvm::RISCVISD::FIRST_VL_VECTOR_OP
@ FIRST_VL_VECTOR_OP
Definition:RISCVISelLowering.h:180
llvm::RISCVISD::UNZIP
@ UNZIP
Definition:RISCVISelLowering.h:168
llvm::RISCVISD::STRICT_VFCVT_RTZ_X_F_VL
@ STRICT_VFCVT_RTZ_X_F_VL
Definition:RISCVISelLowering.h:492
llvm::RISCVISD::FLI
@ FLI
Definition:RISCVISelLowering.h:156
llvm::RISCVISD::STRICT_FDIV_VL
@ STRICT_FDIV_VL
Definition:RISCVISelLowering.h:480
llvm::RISCVISD::UMIN_VL
@ UMIN_VL
Definition:RISCVISelLowering.h:282
llvm::RISCVISD::FSUB_VL
@ FSUB_VL
Definition:RISCVISelLowering.h:308
llvm::RISCVISD::VFCVT_RTZ_XU_F_VL
@ VFCVT_RTZ_XU_F_VL
Definition:RISCVISelLowering.h:321
llvm::RISCVISD::FMV_X_ANYEXTH
@ FMV_X_ANYEXTH
Definition:RISCVISelLowering.h:125
llvm::RISCVISD::VFCVT_RM_F_XU_VL
@ VFCVT_RM_F_XU_VL
Definition:RISCVISelLowering.h:328
llvm::RISCVISD::STRICT_VFCVT_RM_X_F_VL
@ STRICT_VFCVT_RM_X_F_VL
Definition:RISCVISelLowering.h:491
llvm::RISCVISD::ZIP
@ ZIP
Definition:RISCVISelLowering.h:167
llvm::RISCVISD::VFNMSUB_VL
@ VFNMSUB_VL
Definition:RISCVISelLowering.h:336
llvm::RISCVISD::LAST_STRICTFP_OPCODE
@ LAST_STRICTFP_OPCODE
Definition:RISCVISelLowering.h:497
llvm::RISCVISD::VFMSUB_VL
@ VFMSUB_VL
Definition:RISCVISelLowering.h:335
llvm::RISCVISD::SF_VC_VV_SE
@ SF_VC_VV_SE
Definition:RISCVISelLowering.h:439
llvm::RISCVISD::TH_SDD
@ TH_SDD
Definition:RISCVISelLowering.h:504
llvm::RISCVISD::VFWADD_VL
@ VFWADD_VL
Definition:RISCVISelLowering.h:361
llvm::RISCVISD::SF_VC_V_IV_SE
@ SF_VC_V_IV_SE
Definition:RISCVISelLowering.h:452
llvm::RISCVISD::SF_VC_VVW_SE
@ SF_VC_VVW_SE
Definition:RISCVISelLowering.h:447
llvm::RISCVISD::SRL_VL
@ SRL_VL
Definition:RISCVISelLowering.h:273
llvm::RISCVISD::SF_VC_IVW_SE
@ SF_VC_IVW_SE
Definition:RISCVISelLowering.h:446
llvm::RISCVISD::FADD_VL
@ FADD_VL
Definition:RISCVISelLowering.h:307
llvm::RISCVISD::STRICT_FP_EXTEND_VL
@ STRICT_FP_EXTEND_VL
Definition:RISCVISelLowering.h:487
llvm::RISCVISD::STRICT_FP_ROUND_VL
@ STRICT_FP_ROUND_VL
Definition:RISCVISelLowering.h:486
llvm::RISCVISD::VECREDUCE_UMIN_VL
@ VECREDUCE_UMIN_VL
Definition:RISCVISelLowering.h:253
llvm::RISCVISD::SF_VC_XVV_SE
@ SF_VC_XVV_SE
Definition:RISCVISelLowering.h:441
llvm::RISCVISD::FCOPYSIGN_VL
@ FCOPYSIGN_VL
Definition:RISCVISelLowering.h:319
llvm::RISCVISD::VRGATHER_VV_VL
@ VRGATHER_VV_VL
Definition:RISCVISelLowering.h:392
llvm::RISCVISD::DIVUW
@ DIVUW
Definition:RISCVISelLowering.h:99
llvm::RISCVISD::FMIN
@ FMIN
Definition:RISCVISelLowering.h:153
llvm::RISCVISD::VFWMADD_VL
@ VFWMADD_VL
Definition:RISCVISelLowering.h:340
llvm::RISCVISD::VRGATHEREI16_VV_VL
@ VRGATHEREI16_VV_VL
Definition:RISCVISelLowering.h:393
llvm::RISCVISD::VWADDU_W_VL
@ VWADDU_W_VL
Definition:RISCVISelLowering.h:355
llvm::RISCVISD::STRICT_VFMSUB_VL
@ STRICT_VFMSUB_VL
Definition:RISCVISelLowering.h:484
llvm::RISCVISD::SM3P0
@ SM3P0
Definition:RISCVISelLowering.h:174
llvm::RISCVISD::VMCLR_VL
@ VMCLR_VL
Definition:RISCVISelLowering.h:386
llvm::RISCVISD::VFROUND_NOEXCEPT_VL
@ VFROUND_NOEXCEPT_VL
Definition:RISCVISelLowering.h:322
llvm::RISCVISD::MULHSU
@ MULHSU
Definition:RISCVISelLowering.h:83
llvm::RISCVISD::SF_VC_V_XV_SE
@ SF_VC_V_XV_SE
Definition:RISCVISelLowering.h:451
llvm::RISCVISD::REMUW
@ REMUW
Definition:RISCVISelLowering.h:100
llvm::RISCVISD::MRET_GLUE
@ MRET_GLUE
Definition:RISCVISelLowering.h:35
llvm::RISCVISD::SW_GUARDED_CALL
@ SW_GUARDED_CALL
Definition:RISCVISelLowering.h:434
llvm::RISCVISD::CLMULH
@ CLMULH
Definition:RISCVISelLowering.h:171
llvm::RISCVISD::SRET_GLUE
@ SRET_GLUE
Definition:RISCVISelLowering.h:34
llvm::RISCVISD::VFWSUB_VL
@ VFWSUB_VL
Definition:RISCVISelLowering.h:362
llvm::RISCVISD::ABSW
@ ABSW
Definition:RISCVISelLowering.h:111
llvm::RISCVISD::VFCVT_RTZ_X_F_VL
@ VFCVT_RTZ_X_F_VL
Definition:RISCVISelLowering.h:320
llvm::RISCVISD::ADD_VL
@ ADD_VL
Definition:RISCVISelLowering.h:265
llvm::RISCVISD::PROBED_ALLOCA
@ PROBED_ALLOCA
Definition:RISCVISelLowering.h:466
llvm::RISCVISD::VECREDUCE_SMIN_VL
@ VECREDUCE_SMIN_VL
Definition:RISCVISelLowering.h:254
llvm::RISCVISD::TUPLE_INSERT
@ TUPLE_INSERT
Definition:RISCVISelLowering.h:469
llvm::RISCVISD::VECREDUCE_FMIN_VL
@ VECREDUCE_FMIN_VL
Definition:RISCVISelLowering.h:260
llvm::RISCVISD::VFMIN_VL
@ VFMIN_VL
Definition:RISCVISelLowering.h:311
llvm::RISCVISD::SF_VC_IV_SE
@ SF_VC_IV_SE
Definition:RISCVISelLowering.h:438
llvm::RISCVISD::FCVT_XU
@ FCVT_XU
Definition:RISCVISelLowering.h:134
llvm::RISCVISD::VFNMADD_VL
@ VFNMADD_VL
Definition:RISCVISelLowering.h:334
llvm::RISCVISD::BREV8
@ BREV8
Definition:RISCVISelLowering.h:165
llvm::RISCVISD::SF_VC_FVV_SE
@ SF_VC_FVV_SE
Definition:RISCVISelLowering.h:444
llvm::RISCVISD::VECREDUCE_SEQ_FADD_VL
@ VECREDUCE_SEQ_FADD_VL
Definition:RISCVISelLowering.h:259
llvm::RISCVISD::STRICT_FCVT_WU_RV64
@ STRICT_FCVT_WU_RV64
Definition:RISCVISelLowering.h:476
llvm::RISCVISD::SHL_ADD
@ SHL_ADD
Definition:RISCVISelLowering.h:88
llvm::RISCVISD::FMV_W_X_RV64
@ FMV_W_X_RV64
Definition:RISCVISelLowering.h:127
llvm::RISCVISD::FNEG_VL
@ FNEG_VL
Definition:RISCVISelLowering.h:315
llvm::RISCVISD::TUPLE_EXTRACT
@ TUPLE_EXTRACT
Definition:RISCVISelLowering.h:470
llvm::RISCVISD::STRICT_VFNCVT_ROD_VL
@ STRICT_VFNCVT_ROD_VL
Definition:RISCVISelLowering.h:488
llvm::RISCVISD::ADD_LO
@ ADD_LO
Definition:RISCVISelLowering.h:72
llvm::RISCVISD::VFWNMADD_VL
@ VFWNMADD_VL
Definition:RISCVISelLowering.h:341
llvm::RISCVISD::SF_VC_IVV_SE
@ SF_VC_IVV_SE
Definition:RISCVISelLowering.h:442
llvm::RISCVISD::TH_LDD
@ TH_LDD
Definition:RISCVISelLowering.h:502
llvm::RISCVISD::VFWADD_W_VL
@ VFWADD_W_VL
Definition:RISCVISelLowering.h:363
llvm::RISCVISD::FMV_X_ANYEXTW_RV64
@ FMV_X_ANYEXTW_RV64
Definition:RISCVISelLowering.h:128
llvm::RISCVISD::VSLIDEUP_VL
@ VSLIDEUP_VL
Definition:RISCVISelLowering.h:217
llvm::RISCVISD::SW_GUARDED_TAIL
@ SW_GUARDED_TAIL
Definition:RISCVISelLowering.h:435
llvm::RISCVISD::FP_ROUND_VL
@ FP_ROUND_VL
Definition:RISCVISelLowering.h:329
llvm::RISCVLoadFPImm::getLoadFPImm
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
Definition:RISCVBaseInfo.cpp:164
llvm::RISCVMatInt::generateInstSeq
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
Definition:RISCVMatInt.cpp:227
llvm::RISCVMatInt::getIntMatCost
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
Definition:RISCVMatInt.cpp:501
llvm::RISCVMatInt::generateTwoRegInstSeq
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
Definition:RISCVMatInt.cpp:468
llvm::RISCVVIntrinsicsTable
Definition:RISCVISelLowering.cpp:22846
llvm::RISCVVType::decodeVSEW
static unsigned decodeVSEW(unsigned VSEW)
Definition:RISCVTargetParser.h:115
llvm::RISCVVType::decodeVLMUL
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
Definition:RISCVTargetParser.cpp:182
llvm::RISCVVType::encodeLMUL
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
Definition:RISCVTargetParser.h:109
llvm::RISCVVType::encodeSEW
static unsigned encodeSEW(unsigned SEW)
Definition:RISCVTargetParser.h:120
llvm::RISCV::FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Negative_Zero
Definition:RISCVInstrInfo.h:363
llvm::RISCV::FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Subnormal
Definition:RISCVInstrInfo.h:365
llvm::RISCV::FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Positive_Normal
Definition:RISCVInstrInfo.h:366
llvm::RISCV::FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Subnormal
Definition:RISCVInstrInfo.h:362
llvm::RISCV::FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Negative_Normal
Definition:RISCVInstrInfo.h:361
llvm::RISCV::FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Positive_Infinity
Definition:RISCVInstrInfo.h:367
llvm::RISCV::getNamedOperandIdx
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
llvm::RISCV::FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
Definition:RISCVInstrInfo.h:360
llvm::RISCV::FPMASK_Quiet_NaN
static constexpr unsigned FPMASK_Quiet_NaN
Definition:RISCVInstrInfo.h:369
llvm::RISCV::getArgGPRs
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
Definition:RISCVCallingConv.cpp:126
llvm::RISCV::FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Signaling_NaN
Definition:RISCVInstrInfo.h:368
llvm::RISCV::FPMASK_Positive_Zero
static constexpr unsigned FPMASK_Positive_Zero
Definition:RISCVInstrInfo.h:364
llvm::RISCV::RVVBitsPerBlock
static constexpr unsigned RVVBitsPerBlock
Definition:RISCVTargetParser.h:51
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition:RuntimeLibcalls.h:33
llvm::RTLIB::getFPTOUINT
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition:TargetLoweringBase.cpp:251
llvm::RTLIB::getFPTOSINT
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition:TargetLoweringBase.cpp:202
llvm::RTLIB::getFPROUND
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition:TargetLoweringBase.cpp:155
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition:MachineInstrBuilder.h:50
llvm::Reloc::Model
Model
Definition:CodeGen.h:25
llvm::SPII::Store
@ Store
Definition:SparcInstrInfo.h:33
llvm::SPII::Load
@ Load
Definition:SparcInstrInfo.h:32
llvm::Sched::Source
@ Source
Definition:TargetLowering.h:102
llvm::SyncScope::SingleThread
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition:LLVMContext.h:54
llvm::SyncScope::System
@ System
Synchronized with respect to all concurrently executing threads.
Definition:LLVMContext.h:57
llvm::TLSModel::Model
Model
Definition:CodeGen.h:45
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition:CodeGen.h:47
llvm::TLSModel::InitialExec
@ InitialExec
Definition:CodeGen.h:48
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition:CodeGen.h:46
llvm::TLSModel::LocalExec
@ LocalExec
Definition:CodeGen.h:49
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition:X86DisassemblerDecoder.h:621
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::bitc::NoNaNs
@ NoNaNs
Definition:LLVMBitCodes.h:527
llvm::cfg::UpdateKind::Insert
@ Insert
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm::codeview::CompileSym3Flags::Exp
@ Exp
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::dwarf::Index
Index
Definition:Dwarf.h:882
llvm::logicalview::LVAttributeKind::Zero
@ Zero
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::sampleprof::Base
@ Base
Definition:Discriminator.h:58
llvm::support::endian::read32le
uint32_t read32le(const void *P)
Definition:Endian.h:425
llvm::tgtok::TrueVal
@ TrueVal
Definition:TGLexer.h:58
llvm::tgtok::FalseVal
@ FalseVal
Definition:TGLexer.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition:MachineBasicBlock.h:1440
llvm::CC_RISCV_GHC
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Definition:RISCVCallingConv.cpp:727
llvm::Offset
@ Offset
Definition:DWP.cpp:480
llvm::MONontemporalBit1
static const MachineMemOperand::Flags MONontemporalBit1
Definition:RISCVInstrInfo.h:32
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition:MachineInstrBuilder.h:373
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition:SelectionDAG.cpp:12205
llvm::Depth
@ Depth
Definition:SIMachineScheduler.h:36
llvm::LoopIdiomVectorizeStyle::Masked
@ Masked
llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition:STLExtras.h:2448
llvm::createRISCVMCCodeEmitter
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
Definition:RISCVMCCodeEmitter.cpp:105
llvm::bit_width
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition:bit.h:317
llvm::MONontemporalBit0
static const MachineMemOperand::Flags MONontemporalBit0
Definition:RISCVInstrInfo.h:30
llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition:MathExtras.h:556
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition:MathExtras.h:297
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition:VectorUtils.cpp:431
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition:VectorUtils.cpp:312
llvm::isNullOrNullSplat
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition:Utils.cpp:1547
llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition:MathExtras.h:347
llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition:MathExtras.h:395
llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition:bit.h:215
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition:AtomicOrdering.h:133
llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition:RuntimeDyld.cpp:172
llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition:STLExtras.h:1952
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition:MathExtras.h:341
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition:MathExtras.h:292
llvm::ComplexDeinterleavingOperation::Splat
@ Splat
llvm::fcNegSubnormal
@ fcNegSubnormal
Definition:FloatingPointMode.h:246
llvm::fcPosNormal
@ fcPosNormal
Definition:FloatingPointMode.h:250
llvm::fcQNan
@ fcQNan
Definition:FloatingPointMode.h:243
llvm::fcNegZero
@ fcNegZero
Definition:FloatingPointMode.h:247
llvm::fcNegInf
@ fcNegInf
Definition:FloatingPointMode.h:244
llvm::fcPosZero
@ fcPosZero
Definition:FloatingPointMode.h:248
llvm::fcSNan
@ fcSNan
Definition:FloatingPointMode.h:242
llvm::fcNegNormal
@ fcNegNormal
Definition:FloatingPointMode.h:245
llvm::fcPosSubnormal
@ fcPosSubnormal
Definition:FloatingPointMode.h:249
llvm::fcPosInf
@ fcPosInf
Definition:FloatingPointMode.h:251
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition:Debug.cpp:163
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition:Error.cpp:167
llvm::CC_RISCV_FastCC
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
Definition:RISCVCallingConv.cpp:606
llvm::isMask_64
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition:MathExtras.h:274
llvm::CaptureComponents::Address
@ Address
llvm::isOneOrOneSplat
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
Definition:SelectionDAG.cpp:12414
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition:raw_ostream.cpp:907
llvm::PackElem::Hi
@ Hi
llvm::PackElem::Lo
@ Lo
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition:AtomicOrdering.h:56
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AtomicOrdering::Release
@ Release
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition:MathExtras.h:404
llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
llvm::CombineLevel
CombineLevel
Definition:DAGCombine.h:15
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition:VectorUtils.cpp:410
llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::RecurKind::SMin
@ SMin
Signed integer min implemented in terms of select(cmp()).
llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition:MachineInstrBuilder.h:555
llvm::Op
DWARFExpression::Operation Op
Definition:DWARFExpression.cpp:22
llvm::RoundingMode
RoundingMode
Rounding mode.
Definition:FloatingPointMode.h:37
llvm::RoundingMode::TowardZero
@ TowardZero
roundTowardZero.
llvm::RoundingMode::NearestTiesToEven
@ NearestTiesToEven
roundTiesToEven.
llvm::RoundingMode::TowardPositive
@ TowardPositive
roundTowardPositive.
llvm::RoundingMode::NearestTiesToAway
@ NearestTiesToAway
roundTiesToAway.
llvm::RoundingMode::TowardNegative
@ TowardNegative
roundTowardNegative.
llvm::isConstOrConstSplat
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition:SelectionDAG.cpp:12331
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition:AtomicOrdering.h:129
llvm::BitWidth
constexpr unsigned BitWidth
Definition:BitmaskEnum.h:217
llvm::PseudoProbeReservedId::Last
@ Last
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition:STLExtras.h:1945
llvm::RISCVCCAssignFn
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
Definition:RISCVCallingConv.h:21
llvm::isOneConstant
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Definition:SelectionDAG.cpp:12224
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition:STLExtras.h:1903
llvm::processShuffleMasks
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition:VectorUtils.cpp:556
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition:Alignment.h:208
llvm::Cost
InstructionCost Cost
Definition:FunctionSpecialization.h:102
llvm::CC_RISCV
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
Definition:RISCVCallingConv.cpp:324
llvm::Data
@ Data
Definition:SIMachineScheduler.h:55
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition:VectorUtils.cpp:1040
llvm::fltNanEncoding::AllOnes
@ AllOnes
llvm::isNeutralConstant
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
Definition:SelectionDAG.cpp:12234
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition:SelectionDAG.cpp:12219
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860
raw_ostream.h
N
#define N
NC
#define NC
Definition:regutils.h:42
InsertionPoint
Definition:CFIFixup.cpp:129
RegInfo
Definition:AMDGPUAsmParser.cpp:2770
VIDSequence
Definition:RISCVISelLowering.cpp:3373
VIDSequence::StepNumerator
int64_t StepNumerator
Definition:RISCVISelLowering.cpp:3374
VIDSequence::Addend
int64_t Addend
Definition:RISCVISelLowering.cpp:3376
VIDSequence::StepDenominator
unsigned StepDenominator
Definition:RISCVISelLowering.cpp:3375
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition:APFloat.h:302
llvm::APFloatBase::semanticsPrecision
static unsigned int semanticsPrecision(const fltSemantics &)
Definition:APFloat.cpp:315
llvm::APFloatBase::opInvalidOp
@ opInvalidOp
Definition:APFloat.h:320
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition:Alignment.h:85
llvm::EVT
Extended Value Type.
Definition:ValueTypes.h:35
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition:ValueTypes.h:94
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition:ValueTypes.h:390
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition:ValueTypes.h:137
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition:ValueTypes.h:74
llvm::EVT::getScalarStoreSize
uint64_t getScalarStoreSize() const
Definition:ValueTypes.h:397
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition:ValueTypes.h:279
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition:ValueTypes.h:295
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition:ValueTypes.h:147
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition:ValueTypes.h:345
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition:ValueTypes.h:368
llvm::EVT::isByteSized
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition:ValueTypes.h:238
llvm::EVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition:ValueTypes.h:354
llvm::EVT::getRISCVVectorTupleNumFields
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition:ValueTypes.h:359
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition:ValueTypes.h:380
llvm::EVT::getHalfSizedIntegerVT
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition:ValueTypes.h:425
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition:ValueTypes.h:311
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition:ValueTypes.h:65
llvm::EVT::isRISCVVectorTuple
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition:ValueTypes.h:179
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition:ValueTypes.h:376
llvm::EVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition:ValueTypes.h:181
llvm::EVT::getRoundIntegerType
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition:ValueTypes.h:414
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition:ValueTypes.h:168
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition:ValueTypes.h:318
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition:ValueTypes.h:287
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition:ValueTypes.cpp:210
llvm::EVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition:ValueTypes.h:174
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition:ValueTypes.h:323
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition:ValueTypes.h:157
llvm::EVT::changeVectorElementType
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition:ValueTypes.h:102
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition:ValueTypes.h:331
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition:ValueTypes.h:303
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition:ValueTypes.h:152
llvm::ISD::ArgFlagsTy
Definition:TargetCallingConv.h:27
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition:TargetCallingConv.h:199
llvm::Inverse
Definition:GraphTraits.h:123
llvm::KnownBits
Definition:KnownBits.h:23
llvm::KnownBits::urem
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition:KnownBits.cpp:1049
llvm::KnownBits::isUnknown
bool isUnknown() const
Returns true if we don't know any bits.
Definition:KnownBits.h:65
llvm::KnownBits::countMaxTrailingZeros
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition:KnownBits.h:266
llvm::KnownBits::trunc
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition:KnownBits.h:153
llvm::KnownBits::getBitWidth
unsigned getBitWidth() const
Get the bit width of this value.
Definition:KnownBits.h:43
llvm::KnownBits::zext
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition:KnownBits.h:164
llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition:KnownBits.h:73
llvm::KnownBits::countMaxActiveBits
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition:KnownBits.h:288
llvm::KnownBits::intersectWith
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition:KnownBits.h:303
llvm::KnownBits::sext
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition:KnownBits.h:172
llvm::KnownBits::udiv
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition:KnownBits.cpp:1009
llvm::KnownBits::countMaxLeadingZeros
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition:KnownBits.h:272
llvm::KnownBits::One
APInt One
Definition:KnownBits.h:25
llvm::KnownBits::Zero
APInt Zero
Definition:KnownBits.h:24
llvm::KnownBits::shl
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition:KnownBits.cpp:285
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition:MachineMemOperand.h:41
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition:MachineOperand.cpp:1085
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition:MachineOperand.cpp:1066
llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition:MachineMemOperand.h:81
llvm::MachinePointerInfo::getGOT
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition:MachineOperand.cpp:1081
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition:MachineOperand.cpp:1072
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition:Alignment.h:117
llvm::MemOp
Definition:TargetLowering.h:115
llvm::RISCVRegisterInfo
Definition:RISCVRegisterInfo.h:56
llvm::RISCVRegisterInfo::getReservedRegs
BitVector getReservedRegs(const MachineFunction &MF) const override
Definition:RISCVRegisterInfo.cpp:101
llvm::RISCVRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition:RISCVRegisterInfo.cpp:715
llvm::RISCVVIntrinsicsTable::RISCVVIntrinsicInfo
Definition:RISCVISelLowering.h:1085
llvm::RISCVVInversePseudosTable::PseudoInfo
Definition:RISCVMCTargetDesc.h:42
llvm::RISCV::RISCVMaskedPseudoInfo
Definition:RISCVInstrInfo.h:386
llvm::RISCV::RISCVMaskedPseudoInfo::MaskedPseudo
uint16_t MaskedPseudo
Definition:RISCVInstrInfo.h:387
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition:SelectionDAGNodes.h:381
llvm::SDNodeFlags::Disjoint
@ Disjoint
Definition:SelectionDAGNodes.h:398
llvm::SDNodeFlags::hasDisjoint
bool hasDisjoint() const
Definition:SelectionDAGNodes.h:461
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition:SelectionDAGNodes.h:79
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Definition:TargetLowering.h:2816
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition:TargetLowering.h:2818
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition:TargetLowering.h:2817
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition:TargetLowering.h:2819
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition:TargetLowering.h:2820
llvm::TargetLoweringBase::AddrMode::ScalableOffset
int64_t ScalableOffset
Definition:TargetLowering.h:2821
llvm::TargetLoweringBase::IntrinsicInfo
Definition:TargetLowering.h:1202
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition:TargetLowering.h:4529
llvm::TargetLowering::CallLoweringInfo::IsTailCall
bool IsTailCall
Definition:TargetLowering.h:4545
llvm::TargetLowering::CallLoweringInfo::Callee
SDValue Callee
Definition:TargetLowering.h:4552
llvm::TargetLowering::CallLoweringInfo::DL
SDLoc DL
Definition:TargetLowering.h:4555
llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition:TargetLowering.h:4534
llvm::TargetLowering::CallLoweringInfo::Ins
SmallVector< ISD::InputArg, 32 > Ins
Definition:TargetLowering.h:4559
llvm::TargetLowering::CallLoweringInfo::CFIType
const ConstantInt * CFIType
Definition:TargetLowering.h:4561
llvm::TargetLowering::CallLoweringInfo::Chain
SDValue Chain
Definition:TargetLowering.h:4530
llvm::TargetLowering::CallLoweringInfo::NoMerge
bool NoMerge
Definition:TargetLowering.h:4541
llvm::TargetLowering::CallLoweringInfo::CB
const CallBase * CB
Definition:TargetLowering.h:4556
llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition:TargetLowering.h:4557
llvm::TargetLowering::CallLoweringInfo::OutVals
SmallVector< SDValue, 32 > OutVals
Definition:TargetLowering.h:4558
llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition:TargetLowering.h:4551
llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition:TargetLowering.h:4554
llvm::TargetLowering::DAGCombinerInfo
Definition:TargetLowering.h:4228
llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition:TargetLowering.h:4241
llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
void AddToWorklist(SDNode *N)
Definition:DAGCombiner.cpp:916
llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer
bool isCalledByLegalizer() const
Definition:TargetLowering.h:4243
llvm::TargetLowering::DAGCombinerInfo::recursivelyDeleteUnusedNodes
bool recursivelyDeleteUnusedNodes(SDNode *N)
Definition:DAGCombiner.cpp:936
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition:TargetLowering.h:4239
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition:TargetLowering.h:4234
llvm::TargetLowering::DAGCombinerInfo::CombineTo
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition:DAGCombiner.cpp:921
llvm::TargetLowering::MakeLibCallOptions
This structure is used to pass arguments to makeLibCall function.
Definition:TargetLowering.h:4714
llvm::TargetLowering::MakeLibCallOptions::setTypeListBeforeSoften
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
Definition:TargetLowering.h:4749
llvm::TargetLowering::TargetLoweringOpt
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Definition:TargetLowering.h:3946
llvm::TargetLowering::TargetLoweringOpt::DAG
SelectionDAG & DAG
Definition:TargetLowering.h:3947
llvm::TargetLowering::TargetLoweringOpt::CombineTo
bool CombineTo(SDValue O, SDValue N)
Definition:TargetLowering.h:3960
llvm::TargetLowering::TargetLoweringOpt::LegalOps
bool LegalOps
Definition:TargetLowering.h:3949
llvm::cl::desc
Definition:CommandLine.h:409
llvm::fltSemantics
Definition:APFloat.cpp:103

Generated on Thu Jul 17 2025 15:27:50 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp