1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file defines the interfaces that RISC-V uses to lower LLVM code into a 12//===----------------------------------------------------------------------===// 38#include "llvm/IR/IntrinsicsRISCV.h" 53#define DEBUG_TYPE "riscv-lower" 59cl::desc(
"Give the maximum size (in number of nodes) of the web of " 60"instructions that we will consider for VW expansion"),
65cl::desc(
"Allow the formation of VW_W operations (e.g., " 66"VWADD_W) with splat constants"),
71cl::desc(
"Set the minimum number of repetitions of a divisor to allow " 72"transformation to multiplications by the reciprocal"),
77cl::desc(
"Give the maximum number of instructions that we will " 78"use for creating a floating-point immediate value"),
89 !Subtarget.hasStdExtF()) {
90errs() <<
"Hard-float 'f' ABI can't be used for a target that " 91"doesn't support the F instruction set extension (ignoring " 95 !Subtarget.hasStdExtD()) {
96errs() <<
"Hard-float 'd' ABI can't be used for a target that " 97"doesn't support the D instruction set extension (ignoring " 118// Set up the register classes. 121if (Subtarget.hasStdExtZfhmin())
123if (Subtarget.hasStdExtZfbfmin())
125if (Subtarget.hasStdExtF())
127if (Subtarget.hasStdExtD())
129if (Subtarget.hasStdExtZhinxmin())
131if (Subtarget.hasStdExtZfinx())
133if (Subtarget.hasStdExtZdinx()) {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
173auto addRegClassForRVV = [
this](
MVT VT) {
174// Disable the smallest fractional LMUL types if ELEN is less than 177if (VT.getVectorMinNumElements() < MinElts)
180unsignedSize = VT.getSizeInBits().getKnownMinValue();
183 RC = &RISCV::VRRegClass;
185 RC = &RISCV::VRM2RegClass;
187 RC = &RISCV::VRM4RegClass;
189 RC = &RISCV::VRM8RegClass;
196for (
MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198for (
MVT VT : IntVecVTs) {
199if (VT.getVectorElementType() == MVT::i64 &&
202 addRegClassForRVV(VT);
206for (
MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
210for (
MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
214for (
MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
218for (
MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
222auto addRegClassForFixedVectors = [
this](
MVT VT) {
229if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
233if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
271// Compute derived properties from the register classes. 278// DAGCombiner can call isLoadExtLegal for types that aren't legal. 282// TODO: add all necessary setOperationAction calls. 294if (!(Subtarget.hasVendorXCValu() && !Subtarget.
is64Bit())) {
304if (!Subtarget.hasVendorXTHeadBb())
309if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.
is64Bit()))
320if (!Subtarget.hasStdExtZbb())
326if (!Subtarget.hasStdExtZmmul()) {
328 }
elseif (Subtarget.
is64Bit()) {
335if (!Subtarget.hasStdExtM()) {
338 }
elseif (Subtarget.
is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32},
Custom);
350if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
353 }
elseif (Subtarget.hasVendorXTHeadBb()) {
357 }
elseif (Subtarget.hasVendorXCVbitmanip() && !Subtarget.
is64Bit()) {
363// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 364// pattern match it directly in isel. 366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
371if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.
is64Bit()) {
374// Zbkb can use rev8+brev8 to implement bitreverse. 379if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.
is64Bit())) {
385if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.
is64Bit())) {
393if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.
is64Bit())) {
395// We need the custom lowering to make sure that the resulting sequence 396// for the 32bit case is efficient on 64bit targets. 403if (Subtarget.hasVendorXCValu() && !Subtarget.
is64Bit()) {
405 }
elseif (Subtarget.hasShortForwardBranchOpt()) {
406// We can use PseudoCCSUB to implement ABS. 408 }
elseif (Subtarget.
is64Bit()) {
414elseif (!Subtarget.hasVendorXTHeadCondMov())
417staticconstunsigned FPLegalNodeTypes[] = {
431staticconstunsigned FPOpToExpand[] = {
435staticconstunsigned FPRndMode[] = {
439staticconstunsigned ZfhminZfbfminPromoteOps[] = {
450if (Subtarget.hasStdExtZfbfmin()) {
473if (Subtarget.hasStdExtZfa())
500 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ?
Legal :
Promote);
507// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have 508// complete support for all operations in LegalizeDAG. 515// We need to custom promote this. 542if (Subtarget.hasStdExtZfa()) {
560if (Subtarget.hasStdExtZfa()) {
606// f16/bf16 require custom handling. 625// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present. 626// Unfortunately this can't be determined just from the ISA naming string. 642if (Subtarget.hasStdExtZicbop()) {
646if (Subtarget.hasStdExtA()) {
648if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
652 }
elseif (Subtarget.hasForcedAtomics()) {
663// Custom lowering of llvm.clear_cache. 672// RVV intrinsics may have illegal operands. 673// We also need to custom legalize vmv.x.s. 676 {MVT::i8, MVT::i16},
Custom);
687staticconstunsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
702 ISD::EXPERIMENTAL_VP_SPLAT};
704staticconstunsigned FloatingPointVPOps[] = {
705 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
706 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
707 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
708 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
709 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
710 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
711 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
712 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
713 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
714 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
715 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
716 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
717 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
718 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
720staticconstunsigned IntegerVecReduceOps[] = {
725staticconstunsigned FloatingPointVecReduceOps[] = {
729staticconstunsigned FloatingPointLibCallOps[] = {
734// We must custom-lower certain vXi64 operations on RV32 due to the vector 735// element type being illegal. 742 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
743 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
744 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
748for (
MVT VT : BoolVecVTs) {
754// Mask VTs are custom-expanded into a series of standard nodes 778 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
781// RVV has native int->float & float->int conversions where the 782// element type sizes are within one power-of-two of each other. Any 783// wider distances between type sizes have to be lowered as sequences 784// which progressively narrow the gap in stages. 793// Expand all extending loads to types larger than this, and truncating 794// stores from types larger than this. 802 ISD::VP_TRUNCATE, ISD::VP_SETCC},
818for (
MVT VT : IntVecVTs) {
825// Vectors implement MULHS/MULHU. 828// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. 829if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
837// Custom-lower extensions and truncations from/to mask types. 841// RVV has native int->float & float->int conversions where the 842// element type sizes are within one power-of-two of each other. Any 843// wider distances between type sizes have to be lowered as sequences 844// which progressively narrow the gap in stages. 857// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 858// nodes which truncate by one power of two at a time. 863// Custom-lower insert/extract operations to simplify patterns. 867// Custom-lower reduction operations to set up the corresponding custom 879 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
880 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
904if (Subtarget.hasStdExtZvkb()) {
912if (Subtarget.hasStdExtZvbb()) {
916 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
922 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
925// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 931 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
939for (
MVT VT : VecTupleVTs) {
946// Expand various CCs to best match the RVV ISA, which natively supports UNE 947// but no other unordered comparisons, and supports all ordered comparisons 948// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 949// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 950// and we pattern-match those back to the "original", swapping operands once 951// more. This way we catch both operations and both "vf" and "fv" forms with 959// TODO: support more ops. 960staticconstunsigned ZvfhminZvfbfminPromoteOps[] = {
969// TODO: support more vp ops. 970staticconstunsigned ZvfhminZvfbfminPromoteVPOps[] = {
985 ISD::VP_FROUNDTOZERO,
991 ISD::VP_REDUCE_FMINIMUM,
992 ISD::VP_REDUCE_FMAXIMUM};
994// Sets common operation actions on RVV floating-point vector types. 995constauto SetCommonVFPActions = [&](
MVT VT) {
997// RVV has native FP_ROUND & FP_EXTEND conversions where the element type 998// sizes are within one power-of-two of each other. Therefore conversions 999// between vXf16 and vXf64 must be lowered as sequences which convert via 1003// Custom-lower insert/extract operations to simplify patterns. 1006// Expand various condition codes (explained above). 1019// Expand FP operations that need libcalls. 1030 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1031 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1062// Sets common extload/truncstore actions on RVV floating-point vector 1064constauto SetCommonVFPExtLoadTruncStoreActions =
1066for (
auto SmallVT : SmallerVTs) {
1072// Sets common actions for f16 and bf16 for when there's only 1073// zvfhmin/zvfbfmin and we need to promote to f32 for most operations. 1074constauto SetCommonPromoteToF32Actions = [&](
MVT VT) {
1099 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1100 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1108// Expand FP operations that need libcalls. 1111// Custom split nxv32[b]f16 since nxv32[b]f32 is not legal. 1123for (
MVT VT : F16VecVTs) {
1126 SetCommonVFPActions(VT);
1129for (
MVT VT : F16VecVTs) {
1132 SetCommonPromoteToF32Actions(VT);
1137for (
MVT VT : BF16VecVTs) {
1140 SetCommonPromoteToF32Actions(VT);
1145for (
MVT VT : F32VecVTs) {
1148 SetCommonVFPActions(VT);
1149 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1150 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1155for (
MVT VT : F64VecVTs) {
1158 SetCommonVFPActions(VT);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1160 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1161 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1167if (!useRVVForFixedLengthVectorVT(VT))
1170// By default everything must be expanded. 1179// Custom lower fixed vector undefs to scalable vector undefs to avoid 1180// expansion to a build_vector of 0s. 1183// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 1213 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1233// Operations below are different for between masks and other vectors. 1240 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1250// Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to 1251// it before type legalization for i64 vectors on RV32. It will then be 1252// type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. 1253// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs 1254// improvements first. 1264 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1265 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1279// vXi64 MULHS/MULHU requires the V extension instead of Zve64*. 1293// Custom-lower reduction operations to set up the corresponding custom 1302if (Subtarget.hasStdExtZvkb())
1305if (Subtarget.hasStdExtZvbb()) {
1310// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 1323// There are no extending loads or truncating stores. 1329if (!useRVVForFixedLengthVectorVT(VT))
1332// By default everything must be expanded. 1336// Custom lower fixed vector undefs to scalable vector undefs to avoid 1337// expansion to a build_vector of 0s. 1350 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1367if (Subtarget.hasStdExtZfhmin()) {
1370// We need to custom legalize f16 build vectors if Zfhmin isn't 1378// Don't promote f16 vector operations to f32 if f32 vector type is 1380// TODO: could split the f16 vector into two vectors and do promotion. 1391if (Subtarget.hasStdExtZfbfmin()) {
1394// We need to custom legalize bf16 build vectors if Zfbfmin isn't 1402// Don't promote f16 vector operations to f32 if f32 vector type is 1404// TODO: could split the f16 vector into two vectors and do promotion. 1408// TODO: Promote VP ops to fp32. 1445// Custom-legalize bitcasts from fixed-length vectors to scalar types. 1451if (Subtarget.hasStdExtZfbfmin())
1460if (Subtarget.hasStdExtA())
1463if (Subtarget.hasForcedAtomics()) {
1464// Force __sync libcalls to be emitted for atomic rmw/cas operations. 1473if (Subtarget.hasVendorXTHeadMemIdx()) {
1489if (Subtarget.hasVendorXCVmem() && !Subtarget.
is64Bit()) {
1499// Function alignments. 1502// Set preferred alignments. 1515if (Subtarget.hasStdExtZbb())
1518if ((Subtarget.hasStdExtZbs() && Subtarget.
is64Bit()) ||
1522if (Subtarget.hasStdExtZbkb())
1535 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE,
1542if (Subtarget.hasVendorXTHeadMemPair())
1550// Disable strict node mutation. 1554// Let the subtarget decide if a predictable select is more expensive than the 1555// corresponding branch. This information is used in CGP/SelectOpt to decide 1556// when to convert selects into branches. 1585MVT RISCVTargetLowering::getVPExplicitVectorLengthTy()
const{
1589// Return false if we can lower get_vector_length to a vsetvli intrinsic. 1590bool RISCVTargetLowering::shouldExpandGetVectorLength(
EVT TripCountVT,
1592bool IsScalable)
const{
1599if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.
getXLenVT())
1602// Don't allow VF=1 if those types are't legal. 1606// VLEN=32 support is incomplete. 1610// The maximum VF is for the smallest element width with LMUL=8. 1611// VF must be a power of 2. 1624unsigned Intrinsic)
const{
1625auto &
DL =
I.getDataLayout();
1627auto SetRVVLoadStoreInfo = [&](
unsigned PtrOp,
bool IsStore,
1628bool IsUnitStrided,
bool UsePtrVal =
false) {
1630// We can't use ptrVal if the intrinsic can access memory before the 1631// pointer. This means we can't use it for strided or indexed intrinsics. 1633Info.ptrVal =
I.getArgOperand(PtrOp);
1635Info.fallbackAddressSpace =
1636I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1639// Store value is the first operand. 1640 MemTy =
I.getArgOperand(0)->getType();
1642// Use return type. If it's segment load, return type is a struct. 1652// RISC-V vector tuple type's alignment type should be its element type. 1653if (cast<TargetExtType>(MemTy)->
getName() ==
"riscv.vector.tuple")
1656 1 << cast<ConstantInt>(
I.getArgOperand(
I.arg_size() - 1))
1658Info.align =
DL.getABITypeAlign(MemTy);
1668if (
I.hasMetadata(LLVMContext::MD_nontemporal))
1675case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1676case Intrinsic::riscv_masked_atomicrmw_add_i32:
1677case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1678case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1679case Intrinsic::riscv_masked_atomicrmw_max_i32:
1680case Intrinsic::riscv_masked_atomicrmw_min_i32:
1681case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1682case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1683case Intrinsic::riscv_masked_cmpxchg_i32:
1685Info.memVT = MVT::i32;
1686Info.ptrVal =
I.getArgOperand(0);
1692case Intrinsic::riscv_seg2_load:
1693case Intrinsic::riscv_seg3_load:
1694case Intrinsic::riscv_seg4_load:
1695case Intrinsic::riscv_seg5_load:
1696case Intrinsic::riscv_seg6_load:
1697case Intrinsic::riscv_seg7_load:
1698case Intrinsic::riscv_seg8_load:
1699return SetRVVLoadStoreInfo(
/*PtrOp*/ 0,
/*IsStore*/false,
1700/*IsUnitStrided*/false,
/*UsePtrVal*/true);
1701case Intrinsic::riscv_seg2_store:
1702case Intrinsic::riscv_seg3_store:
1703case Intrinsic::riscv_seg4_store:
1704case Intrinsic::riscv_seg5_store:
1705case Intrinsic::riscv_seg6_store:
1706case Intrinsic::riscv_seg7_store:
1707case Intrinsic::riscv_seg8_store:
1708// Operands are (vec, ..., vec, ptr, vl) 1709return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 2,
1711/*IsUnitStrided*/false,
/*UsePtrVal*/true);
1712case Intrinsic::riscv_vle:
1713case Intrinsic::riscv_vle_mask:
1714case Intrinsic::riscv_vleff:
1715case Intrinsic::riscv_vleff_mask:
1716return SetRVVLoadStoreInfo(
/*PtrOp*/ 1,
1718/*IsUnitStrided*/true,
1720case Intrinsic::riscv_vse:
1721case Intrinsic::riscv_vse_mask:
1722return SetRVVLoadStoreInfo(
/*PtrOp*/ 1,
1724/*IsUnitStrided*/true,
1726case Intrinsic::riscv_vlse:
1727case Intrinsic::riscv_vlse_mask:
1728case Intrinsic::riscv_vloxei:
1729case Intrinsic::riscv_vloxei_mask:
1730case Intrinsic::riscv_vluxei:
1731case Intrinsic::riscv_vluxei_mask:
1732return SetRVVLoadStoreInfo(
/*PtrOp*/ 1,
1734/*IsUnitStrided*/false);
1735case Intrinsic::riscv_vsse:
1736case Intrinsic::riscv_vsse_mask:
1737case Intrinsic::riscv_vsoxei:
1738case Intrinsic::riscv_vsoxei_mask:
1739case Intrinsic::riscv_vsuxei:
1740case Intrinsic::riscv_vsuxei_mask:
1741return SetRVVLoadStoreInfo(
/*PtrOp*/ 1,
1743/*IsUnitStrided*/false);
1744case Intrinsic::riscv_vlseg2:
1745case Intrinsic::riscv_vlseg3:
1746case Intrinsic::riscv_vlseg4:
1747case Intrinsic::riscv_vlseg5:
1748case Intrinsic::riscv_vlseg6:
1749case Intrinsic::riscv_vlseg7:
1750case Intrinsic::riscv_vlseg8:
1751case Intrinsic::riscv_vlseg2ff:
1752case Intrinsic::riscv_vlseg3ff:
1753case Intrinsic::riscv_vlseg4ff:
1754case Intrinsic::riscv_vlseg5ff:
1755case Intrinsic::riscv_vlseg6ff:
1756case Intrinsic::riscv_vlseg7ff:
1757case Intrinsic::riscv_vlseg8ff:
1758return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 3,
1760/*IsUnitStrided*/false,
/*UsePtrVal*/true);
1761case Intrinsic::riscv_vlseg2_mask:
1762case Intrinsic::riscv_vlseg3_mask:
1763case Intrinsic::riscv_vlseg4_mask:
1764case Intrinsic::riscv_vlseg5_mask:
1765case Intrinsic::riscv_vlseg6_mask:
1766case Intrinsic::riscv_vlseg7_mask:
1767case Intrinsic::riscv_vlseg8_mask:
1768case Intrinsic::riscv_vlseg2ff_mask:
1769case Intrinsic::riscv_vlseg3ff_mask:
1770case Intrinsic::riscv_vlseg4ff_mask:
1771case Intrinsic::riscv_vlseg5ff_mask:
1772case Intrinsic::riscv_vlseg6ff_mask:
1773case Intrinsic::riscv_vlseg7ff_mask:
1774case Intrinsic::riscv_vlseg8ff_mask:
1775return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 5,
1777/*IsUnitStrided*/false,
/*UsePtrVal*/true);
1778case Intrinsic::riscv_vlsseg2:
1779case Intrinsic::riscv_vlsseg3:
1780case Intrinsic::riscv_vlsseg4:
1781case Intrinsic::riscv_vlsseg5:
1782case Intrinsic::riscv_vlsseg6:
1783case Intrinsic::riscv_vlsseg7:
1784case Intrinsic::riscv_vlsseg8:
1785case Intrinsic::riscv_vloxseg2:
1786case Intrinsic::riscv_vloxseg3:
1787case Intrinsic::riscv_vloxseg4:
1788case Intrinsic::riscv_vloxseg5:
1789case Intrinsic::riscv_vloxseg6:
1790case Intrinsic::riscv_vloxseg7:
1791case Intrinsic::riscv_vloxseg8:
1792case Intrinsic::riscv_vluxseg2:
1793case Intrinsic::riscv_vluxseg3:
1794case Intrinsic::riscv_vluxseg4:
1795case Intrinsic::riscv_vluxseg5:
1796case Intrinsic::riscv_vluxseg6:
1797case Intrinsic::riscv_vluxseg7:
1798case Intrinsic::riscv_vluxseg8:
1799return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 4,
1801/*IsUnitStrided*/false);
1802case Intrinsic::riscv_vlsseg2_mask:
1803case Intrinsic::riscv_vlsseg3_mask:
1804case Intrinsic::riscv_vlsseg4_mask:
1805case Intrinsic::riscv_vlsseg5_mask:
1806case Intrinsic::riscv_vlsseg6_mask:
1807case Intrinsic::riscv_vlsseg7_mask:
1808case Intrinsic::riscv_vlsseg8_mask:
1809case Intrinsic::riscv_vloxseg2_mask:
1810case Intrinsic::riscv_vloxseg3_mask:
1811case Intrinsic::riscv_vloxseg4_mask:
1812case Intrinsic::riscv_vloxseg5_mask:
1813case Intrinsic::riscv_vloxseg6_mask:
1814case Intrinsic::riscv_vloxseg7_mask:
1815case Intrinsic::riscv_vloxseg8_mask:
1816case Intrinsic::riscv_vluxseg2_mask:
1817case Intrinsic::riscv_vluxseg3_mask:
1818case Intrinsic::riscv_vluxseg4_mask:
1819case Intrinsic::riscv_vluxseg5_mask:
1820case Intrinsic::riscv_vluxseg6_mask:
1821case Intrinsic::riscv_vluxseg7_mask:
1822case Intrinsic::riscv_vluxseg8_mask:
1823return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 6,
1825/*IsUnitStrided*/false);
1826case Intrinsic::riscv_vsseg2:
1827case Intrinsic::riscv_vsseg3:
1828case Intrinsic::riscv_vsseg4:
1829case Intrinsic::riscv_vsseg5:
1830case Intrinsic::riscv_vsseg6:
1831case Intrinsic::riscv_vsseg7:
1832case Intrinsic::riscv_vsseg8:
1833return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 3,
1835/*IsUnitStrided*/false);
1836case Intrinsic::riscv_vsseg2_mask:
1837case Intrinsic::riscv_vsseg3_mask:
1838case Intrinsic::riscv_vsseg4_mask:
1839case Intrinsic::riscv_vsseg5_mask:
1840case Intrinsic::riscv_vsseg6_mask:
1841case Intrinsic::riscv_vsseg7_mask:
1842case Intrinsic::riscv_vsseg8_mask:
1843return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 4,
1845/*IsUnitStrided*/false);
1846case Intrinsic::riscv_vssseg2:
1847case Intrinsic::riscv_vssseg3:
1848case Intrinsic::riscv_vssseg4:
1849case Intrinsic::riscv_vssseg5:
1850case Intrinsic::riscv_vssseg6:
1851case Intrinsic::riscv_vssseg7:
1852case Intrinsic::riscv_vssseg8:
1853case Intrinsic::riscv_vsoxseg2:
1854case Intrinsic::riscv_vsoxseg3:
1855case Intrinsic::riscv_vsoxseg4:
1856case Intrinsic::riscv_vsoxseg5:
1857case Intrinsic::riscv_vsoxseg6:
1858case Intrinsic::riscv_vsoxseg7:
1859case Intrinsic::riscv_vsoxseg8:
1860case Intrinsic::riscv_vsuxseg2:
1861case Intrinsic::riscv_vsuxseg3:
1862case Intrinsic::riscv_vsuxseg4:
1863case Intrinsic::riscv_vsuxseg5:
1864case Intrinsic::riscv_vsuxseg6:
1865case Intrinsic::riscv_vsuxseg7:
1866case Intrinsic::riscv_vsuxseg8:
1867return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 4,
1869/*IsUnitStrided*/false);
1870case Intrinsic::riscv_vssseg2_mask:
1871case Intrinsic::riscv_vssseg3_mask:
1872case Intrinsic::riscv_vssseg4_mask:
1873case Intrinsic::riscv_vssseg5_mask:
1874case Intrinsic::riscv_vssseg6_mask:
1875case Intrinsic::riscv_vssseg7_mask:
1876case Intrinsic::riscv_vssseg8_mask:
1877case Intrinsic::riscv_vsoxseg2_mask:
1878case Intrinsic::riscv_vsoxseg3_mask:
1879case Intrinsic::riscv_vsoxseg4_mask:
1880case Intrinsic::riscv_vsoxseg5_mask:
1881case Intrinsic::riscv_vsoxseg6_mask:
1882case Intrinsic::riscv_vsoxseg7_mask:
1883case Intrinsic::riscv_vsoxseg8_mask:
1884case Intrinsic::riscv_vsuxseg2_mask:
1885case Intrinsic::riscv_vsuxseg3_mask:
1886case Intrinsic::riscv_vsuxseg4_mask:
1887case Intrinsic::riscv_vsuxseg5_mask:
1888case Intrinsic::riscv_vsuxseg6_mask:
1889case Intrinsic::riscv_vsuxseg7_mask:
1890case Intrinsic::riscv_vsuxseg8_mask:
1891return SetRVVLoadStoreInfo(
/*PtrOp*/I.arg_size() - 5,
1893/*IsUnitStrided*/false);
1901// No global is ever allowed as a base. 1905// None of our addressing modes allows a scalable offset 1909// RVV instructions only support register addressing. 1913// Require a 12-bit signed offset. 1918case 0:
// "r+i" or just "i", depending on HasBaseReg. 1923returnfalse;
// disallow "r+r" or "r+r+i". 1932return isInt<12>(Imm);
1936return isInt<12>(Imm);
1939// On RV32, 64-bit integers are split into their high and low parts and held 1940// in two different registers, so the trunc is free since the low register can 1942// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of 1949return (SrcBits == 64 && DestBits == 32);
1953// We consider i64->i32 free on RV64 since we have good selection of W 1954// instructions that make promoting operations back to i64 free in many cases. 1960return (SrcBits == 64 && DestBits == 32);
1965// free truncate from vnsrl and vnsra 1971if (SrcBits == DestBits * 2) {
1979// Zexts are free if they can be combined with a load. 1980// Don't advertise i32->i64 zextload as being free for RV64. It interacts 1981// poorly with type legalization of compares preferring sext. 1982if (
auto *LD = dyn_cast<LoadSDNode>(Val)) {
1983EVT MemVT = LD->getMemoryVT();
1984if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1994return Subtarget.
is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2002return Subtarget.hasStdExtZbb() ||
2003 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.
is64Bit());
2007return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2008 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.
is64Bit());
2013// We expect to be able to match a bit extraction instruction if the Zbs 2014// extension is supported and the mask is a power of two. However, we 2015// conservatively return false if the mask would fit in an ANDI instruction, 2016// on the basis that it's possible the sinking+duplication of the AND in 2017// CodeGenPrepare triggered by this hook wouldn't decrease the instruction 2018// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). 2019if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2024return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2028EVT VT =
Y.getValueType();
2030// FIXME: Support vectors once we have tests. 2034return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2035 (!isa<ConstantSDNode>(
Y) || cast<ConstantSDNode>(
Y)->isOpaque());
2039// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. 2040if (Subtarget.hasStdExtZbs())
2041returnX.getValueType().isScalarInteger();
2042auto *
C = dyn_cast<ConstantSDNode>(
Y);
2043// XTheadBs provides th.tst (similar to bexti), if Y is a constant 2044if (Subtarget.hasVendorXTHeadBs())
2046// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. 2047returnC &&
C->getAPIntValue().ule(10);
2052// Only enable for rvv. 2067if (BitSize > Subtarget.
getXLen())
2070// Fast path, assume 32-bit immediates are cheap. 2071 int64_t Val = Imm.getSExtValue();
2075// A constant pool entry may be more aligned thant he load we're trying to 2076// replace. If we don't support unaligned scalar mem, prefer the constant 2078// TODO: Can the caller pass down the alignment? 2079if (!Subtarget.enableUnalignedScalarMem())
2082// Prefer to keep the load if it would require many instructions. 2083// This uses the same threshold we use for constant pools but doesn't 2084// check useConstantPoolForLargeInts. 2085// TODO: Should we keep the load only when we're definitely going to emit a 2095unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
2097// One interesting pattern that we'd want to form is 'bit extract': 2098// ((1 >> Y) & 1) ==/!= 0 2099// But we also need to be careful not to try to reverse that fold. 2101// Is this '((1 >> Y) & 1)'? 2102if (XC && OldShiftOpcode ==
ISD::SRL && XC->isOne())
2103returnfalse;
// Keep the 'bit extract' pattern. 2105// Will this be '((1 >> Y) & 1)' after the transform? 2107returntrue;
// Do form the 'bit extract' pattern. 2109// If 'X' is a constant, and we transform, then we will immediately 2110// try to undo the fold, thus causing endless combine loop. 2111// So only do the transform if X is not a constant. This matches the default 2112// implementation of this function. 2119// Assume target opcodes can't be scalarized. 2120// TODO - do we have any exceptions? 2124// If the vector op is not supported, try to convert to scalar. 2129// If the vector op is supported, but the scalar op is not, the transform may 2130// not be worthwhile. 2131// Permit a vector binary operation can be converted to scalar binary 2132// operation which is custom lowered with illegal type. 2140// In order to maximise the opportunity for common subexpression elimination, 2141// keep a separate ADD node for the global address offset instead of folding 2142// it in the global address node. Later peephole optimisations may choose to 2143// fold it back in when profitable. 2147// Returns 0-31 if the fli instruction is available for the type and this is 2148// legal FP immediate for the type. Returns -1 otherwise. 2150if (!Subtarget.hasStdExtZfa())
2153bool IsSupportedVT =
false;
2154if (VT == MVT::f16) {
2155 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2156 }
elseif (VT == MVT::f32) {
2157 IsSupportedVT =
true;
2158 }
elseif (VT == MVT::f64) {
2159assert(Subtarget.hasStdExtD() &&
"Expect D extension");
2160 IsSupportedVT =
true;
2170bool ForCodeSize)
const{
2171bool IsLegalVT =
false;
2174elseif (VT == MVT::f32)
2176elseif (VT == MVT::f64)
2178elseif (VT == MVT::bf16)
2179 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2187// Cannot create a 64 bit floating-point immediate value for rv32. 2189// td can handle +0.0 or -0.0 already. 2190// -0.0 can be created by fmv + fneg. 2194// Special case: fmv + fneg 2198// Building an integer and then converting requires a fmv at the end of 2199// the integer sequence. The fmv is not required for Zfinx. 2200constint FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2203 Subtarget.
getXLen(), Subtarget);
2207// TODO: This is very conservative. 2209unsigned Index)
const{
2213// Only support extracting a fixed from a fixed vector for now. 2220// The smallest type we can slide is i8. 2221// TODO: We can extract index 0 from a mask vector without a slide. 2222if (EltVT == MVT::i1)
2231// If we're extracting only data from the first VLEN bits of the source 2232// then we can always do this with an m1 vslidedown.vx. Restricting the 2233// Index ensures we can use a vslidedown.vi. 2234// TODO: We can generalize this when the exact VLEN is known. 2235if (Index + ResElts <= MinVLMAX && Index < 31)
2238// Convervatively only handle extracting half of a vector. 2239// TODO: We can do arbitrary slidedowns, but for now only support extracting 2240// the upper half of a vector until we have more test coverage. 2241// TODO: For sizes which aren't multiples of VLEN sizes, this may not be 2242// a cheap extract. However, this case is important in practice for 2243// shuffled extracts of longer vectors. How resolve? 2244return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2250// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2251// We might still end up using a GPR but that will be decided based on ABI. 2263 std::optional<MVT> RegisterVT)
const{
2264// Pair inline assembly operand 2265if (VT == (Subtarget.
is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2266 *RegisterVT == MVT::Untyped)
2275// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2276// We might still end up using a GPR but that will be decided based on ABI. 2286unsigned &NumIntermediates,
MVT &RegisterVT)
const{
2288 Context,
CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2293// Changes the condition code and swaps operands if necessary, so the SetCC 2294// operation matches one of the comparisons supported directly by branches 2295// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 2299// If this is a single bit test that can't be handled by ANDI, shift the 2300// bit to be tested to the MSB and perform a signed compare with 0. 2303 isa<ConstantSDNode>(
LHS.getOperand(1))) {
2309 ShAmt =
LHS.getValueSizeInBits() - 1 -
Log2_64(Mask);
2322if (
auto *RHSC = dyn_cast<ConstantSDNode>(
RHS)) {
2323 int64_t
C = RHSC->getSExtValue();
2327// Convert X > -1 to X >= 0. 2335// Convert X < 1 to 0 >= X. 2361if (VT.
SimpleTy >= MVT::riscv_nxv1i8x2 &&
2362 VT.
SimpleTy <= MVT::riscv_nxv1i8x8)
2364if (VT.
SimpleTy >= MVT::riscv_nxv2i8x2 &&
2365 VT.
SimpleTy <= MVT::riscv_nxv2i8x8)
2367if (VT.
SimpleTy >= MVT::riscv_nxv4i8x2 &&
2368 VT.
SimpleTy <= MVT::riscv_nxv4i8x8)
2370if (VT.
SimpleTy >= MVT::riscv_nxv8i8x2 &&
2371 VT.
SimpleTy <= MVT::riscv_nxv8i8x8)
2373if (VT.
SimpleTy >= MVT::riscv_nxv16i8x2 &&
2374 VT.
SimpleTy <= MVT::riscv_nxv16i8x4)
2376if (VT.
SimpleTy == MVT::riscv_nxv32i8x2)
2414return RISCV::VRRegClassID;
2416return RISCV::VRM2RegClassID;
2418return RISCV::VRM4RegClassID;
2420return RISCV::VRM8RegClassID;
2430static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2431"Unexpected subreg numbering");
2432return RISCV::sub_vrm1_0 + Index;
2435static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2436"Unexpected subreg numbering");
2437return RISCV::sub_vrm2_0 + Index;
2440static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2441"Unexpected subreg numbering");
2442return RISCV::sub_vrm4_0 + Index;
2450unsigned RegsPerField =
2453switch (RegsPerField) {
2456return RISCV::VRN2M1RegClassID;
2458return RISCV::VRN3M1RegClassID;
2460return RISCV::VRN4M1RegClassID;
2462return RISCV::VRN5M1RegClassID;
2464return RISCV::VRN6M1RegClassID;
2466return RISCV::VRN7M1RegClassID;
2468return RISCV::VRN8M1RegClassID;
2472return RISCV::VRN2M2RegClassID;
2474return RISCV::VRN3M2RegClassID;
2476return RISCV::VRN4M2RegClassID;
2480return RISCV::VRN2M4RegClassID;
2488return RISCV::VRRegClassID;
2492// Attempt to decompose a subvector insert/extract between VecVT and 2493// SubVecVT via subregister indices. Returns the subregister index that 2494// can perform the subvector insert/extract with the given element index, as 2495// well as the index corresponding to any leftover subvectors that must be 2496// further inserted/extracted within the register class for SubVecVT. 2497std::pair<unsigned, unsigned>
2499MVT VecVT,
MVT SubVecVT,
unsigned InsertExtractIdx,
2501static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2502 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2503 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2504"Register classes not ordered");
2508// If VecVT is a vector tuple type, either it's the tuple type with same 2509// RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT. 2511if (VecRegClassID == SubRegClassID)
2512return {RISCV::NoSubRegister, 0};
2515"Only allow scalable vector subvector.");
2517"Invalid vector tuple insert/extract for vector and subvector with " 2522// Try to compose a subregister index that takes us from the incoming 2523// LMUL>1 register class down to the outgoing one. At each step we half 2525// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 2526// Note that this is not guaranteed to find a subregister index, such as 2527// when we are extracting from one VR type to another. 2528unsigned SubRegIdx = RISCV::NoSubRegister;
2529for (
constunsigned RCID :
2530 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2531if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2535 SubRegIdx =
TRI->composeSubRegIndices(SubRegIdx,
2540return {SubRegIdx, InsertExtractIdx};
2543// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar 2544// stores for those types. 2545bool RISCVTargetLowering::mergeStoresAfterLegalization(
EVT VT)
const{
2576unsigned RISCVTargetLowering::combineRepeatedFPDivisors()
const{
2583"Unexpected opcode");
2585unsigned IntNo =
Op.getConstantOperandVal(HasChain ? 1 : 0);
2587 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2590returnOp.getOperand(
II->VLOperand + 1 + HasChain);
2599// We only support a set of vector types with a consistent maximum fixed size 2600// across all supported vector element types to avoid legalization issues. 2601// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest 2602// fixed-length vector type we support is 1024 bytes. 2610// Don't use RVV for vectors we cannot scalarize if required. 2612// i1 is supported but has different rules. 2616// Masks can only use a single register. 2647// Reject elements larger than ELEN. 2652// Don't use RVV for types that don't fit. 2656// TODO: Perhaps an artificial restriction, but worth having whilst getting 2657// the base fixed length RVV support in place. 2664bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(
MVT VT)
const{
2665 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2668// Return the largest legal scalable vector type that matches VT's element type. 2671// This may be called before legal types are setup. 2674"Expected legal fixed length vector!");
2677unsigned MaxELen = Subtarget.
getELen();
2692// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for 2693// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within 2694// each fractional LMUL we support SEW between 8 and LMUL*ELEN. 2711 return ::getContainerForFixedLengthVector(*
this, VT,
getSubtarget());
2714// Grow V to consume an entire RVV register. 2718"Expected to convert into a scalable vector!");
2719assert(V.getValueType().isFixedLengthVector() &&
2720"Expected a fixed length vector operand!");
2726// Shrink V so it's just big enough to maintain a VT's worth of data. 2730"Expected to convert into a fixed length vector!");
2731assert(V.getValueType().isScalableVector() &&
2732"Expected a scalable vector operand!");
2738/// Return the type of the mask type suitable for masking the provided 2739/// vector type. This is simply an i1 element type vector of the same 2740/// (possibly scalable) length. 2747/// Creates an all ones mask suitable for masking a vector of type VecTy with 2748/// vector length VL. . 2755static std::pair<SDValue, SDValue>
2764static std::pair<SDValue, SDValue>
2773// Gets the two common "VL" operands: an all-ones mask and the vector length. 2774// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 2775// the vector type that the fixed-length vector is contained in. Otherwise if 2776// VecVT is scalable, then ContainerVT should be the same as VecVT. 2777static std::pair<SDValue, SDValue>
2794std::pair<unsigned, unsigned>
2810return std::make_pair(MinVLMAX, MaxVLMAX);
2813// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 2814// of either is (currently) supported. This can get us into an infinite loop 2815// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 2817// Until either (or both) of these can reliably lower any node, reporting that 2818// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 2819// the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 2820// which is not desirable. 2822EVT VT,
unsigned DefinedValues)
const{
2827// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is 2828// implementation-defined. 2836 std::tie(LMul, Fractional) =
2839Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2841Cost = (LMul * DLenFactor);
2849/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv 2850/// is generally quadratic in the number of vreg implied by LMUL. Note that 2851/// operand (index and possibly mask) are handled separately. 2856/// Return the cost of a vrgather.vi (or vx) instruction for the type VT. 2857/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, 2858/// or may track the vrgather.vv cost. It is implementation-dependent. 2863/// Return the cost of a vslidedown.vx or vslideup.vx instruction 2864/// for the type VT. (This does not cover the vslide1up or vslide1down 2865/// variants.) Slides may be linear in the number of vregs implied by LMUL, 2866/// or may track the vrgather.vv cost. It is implementation-dependent. 2871/// Return the cost of a vslidedown.vi or vslideup.vi instruction 2872/// for the type VT. (This does not cover the vslide1up or vslide1down 2873/// variants.) Slides may be linear in the number of vregs implied by LMUL, 2874/// or may track the vrgather.vv cost. It is implementation-dependent. 2881// f16 conversions are promoted to f32 when Zfh/Zhinx are not supported. 2882// bf16 conversions are always promoted to f32. 2884Op.getValueType() == MVT::bf16) {
2885bool IsStrict =
Op->isStrictFPOpcode();
2890 {Op.getOperand(0), Op.getOperand(1)});
2892 {
Op.getValueType(), MVT::Other},
2902// Other operations are legal. 2908// RISC-V FP-to-int conversions saturate to the destination register size, but 2909// don't produce 0 for nan. We can use a conversion instruction and fix the 2910// nan case with a compare and a select. 2913MVT DstVT =
Op.getSimpleValueType();
2914EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
2919// For bf16 or for f16 in absense of Zfh, promote to f32, then saturate 2922 Src.getValueType() == MVT::bf16) {
2929elseif (DstVT == MVT::i64 && SatVT == MVT::i32)
2933// FIXME: Support other SatVTs by clamping before or after the conversion. 2951MVT SrcVT = Src.getSimpleValueType();
2956// Only handle saturating to the destination type. 2957if (SatVT != DstEltVT)
2960MVT DstContainerVT = DstVT;
2961MVT SrcContainerVT = SrcVT;
2967"Expected same element count");
2976 {Src, Src, DAG.getCondCode(ISD::SETNE),
2977 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2979// Need to widen by more than 1 step, promote the FP type, then do a widening 2981if (DstEltSize > (2 * SrcEltSize)) {
2987MVT CvtContainerVT = DstContainerVT;
2988MVT CvtEltVT = DstEltVT;
2989if (SrcEltSize > (2 * DstEltSize)) {
2998while (CvtContainerVT != DstContainerVT) {
3001// Rounding mode here is arbitrary since we aren't shifting out any bits. 3004 Res = DAG.
getNode(ClipOpc,
DL, CvtContainerVT, Res, Mask, VL);
3011 Res, DAG.
getUNDEF(DstContainerVT), VL);
3021bool IsStrict =
Op->isStrictFPOpcode();
3022SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3024// f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled. 3025// bf16 conversions are always promoted to f32. 3032 {
Op.getOperand(0), SrcVal});
3033return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
3034 {Ext.getValue(1), Ext.getValue(0)});
3040// Other operations are legal. 3048case ISD::VP_FROUNDEVEN:
3052case ISD::VP_FROUNDTOZERO:
3074// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND 3075// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to 3076// the integer domain and back. Taking care to avoid converting values that are 3077// nan or already correct. 3081MVT VT =
Op.getSimpleValueType();
3088MVT ContainerVT = VT;
3095if (
Op->isVPOpcode()) {
3096 Mask =
Op.getOperand(1);
3100 VL =
Op.getOperand(2);
3105// Freeze the source since we are increasing the number of uses. 3108// We do the conversion on the absolute value and fix the sign at the end. 3111// Determine the largest integer that can be represented exactly. This and 3112// values larger than it don't have any fractional bits so don't need to 3122 DAG.
getUNDEF(ContainerVT), MaxValNode, VL);
3124// If abs(Src) was larger than MaxVal or nan, keep it. 3131// Truncate to integer and convert back to FP. 3136switch (
Op.getOpcode()) {
3148case ISD::VP_FROUNDEVEN:
3149case ISD::VP_FROUNDTOZERO: {
3161case ISD::VP_FNEARBYINT:
3167// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 3172// Restore the original sign so that -0.0 is preserved. 3174 Src, Src, Mask, VL);
3182// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND 3183// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to 3184// qNan and coverting the new source to integer and back to FP. 3189MVT VT =
Op.getSimpleValueType();
3193MVT ContainerVT = VT;
3201// Freeze the source since we are increasing the number of uses. 3204// Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. 3205MVT MaskVT = Mask.getSimpleValueType();
3208 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3209 DAG.getUNDEF(MaskVT), Mask, VL});
3213 {Chain, Src, Src, Src, Unorder, VL});
3214 Chain = Src.getValue(1);
3216// We do the conversion on the absolute value and fix the sign at the end. 3219// Determine the largest integer that can be represented exactly. This and 3220// values larger than it don't have any fractional bits so don't need to 3230 DAG.
getUNDEF(ContainerVT), MaxValNode, VL);
3232// If abs(Src) was larger than MaxVal or nan, keep it. 3237// Truncate to integer and convert back to FP. 3242switch (
Op.getOpcode()) {
3253 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3259 DAG.
getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3263 DAG.
getVTList(ContainerVT, MVT::Other), Chain, Src,
3269// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 3272 DAG.
getVTList(ContainerVT, MVT::Other), Chain,
3273 Truncated, Mask, VL);
3277// Restore the original sign so that -0.0 is preserved. 3279 Src, Src, Mask, VL);
3289MVT VT =
Op.getSimpleValueType();
3299// Create an integer the size of the mantissa with the MSB set. This and all 3300// values larger than it don't have any fractional bits so don't need to be 3314// Expand vector LRINT and LLRINT by converting to the integer domain. 3317MVT VT =
Op.getSimpleValueType();
3322MVT ContainerVT = VT;
3367"Unexpected vector MVT");
3381// We will use a SINT_TO_FP to materialize this constant so we should use a 3382// signed APSInt here. 3384// We use an arbitrary rounding mode here. If a floating-point is an exact 3385// integer (e.g., 1.0), the rounding mode does not affect the output value. If 3386// the rounding mode changes the output value, then it is not an exact 3390// If it is out of signed integer range, it will return an invalid operation. 3391// If it is not an exact integer, IsExact is false. 3399// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] 3400// to the (non-zero) step S and start value X. This can be then lowered as the 3401// RVV sequence (VID * S) + X, for example. 3402// The step S is represented as an integer numerator divided by a positive 3403// denominator. Note that the implementation currently only identifies 3404// sequences in which either the numerator is +/- 1 or the denominator is 1. It 3405// cannot detect 2/3, for example. 3406// Note that this method will also match potentially unappealing index 3407// sequences, like <i32 0, i32 50939494>, however it is left to the caller to 3408// determine whether this is worth generating code for. 3410// EltSizeInBits is the size of the type that the sequence will be calculated 3411// in, i.e. SEW for build_vectors or XLEN for address calculations. 3413unsigned EltSizeInBits) {
3417bool IsInteger =
Op.getValueType().isInteger();
3419 std::optional<unsigned> SeqStepDenom;
3420 std::optional<APInt> SeqStepNum;
3421 std::optional<APInt> SeqAddend;
3422 std::optional<std::pair<APInt, unsigned>> PrevElt;
3423assert(EltSizeInBits >=
Op.getValueType().getScalarSizeInBits());
3425// First extract the ops into a list of constant integer values. This may not 3426// be possible for floats if they're not all representable as integers. 3428constunsigned OpSize =
Op.getScalarValueSizeInBits();
3431 Elts[
Idx] = std::nullopt;
3435 Elts[
Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3441 Elts[
Idx] = *ExactInteger;
3446// Assume undef elements match the sequence; we just have to be careful 3447// when interpolating across them. 3452// Calculate the step since the last non-undef element, and ensure 3453// it's consistent across the entire sequence. 3454unsigned IdxDiff =
Idx - PrevElt->second;
3455APInt ValDiff = *Elt - PrevElt->first;
3457// A zero-value value difference means that we're somewhere in the middle 3458// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a 3459// step change before evaluating the sequence. 3463 int64_t Remainder = ValDiff.
srem(IdxDiff);
3464// Normalize the step if it's greater than 1. 3466// The difference must cleanly divide the element span. 3469 ValDiff = ValDiff.
sdiv(IdxDiff);
3474 SeqStepNum = ValDiff;
3475elseif (ValDiff != SeqStepNum)
3479 SeqStepDenom = IdxDiff;
3480elseif (IdxDiff != *SeqStepDenom)
3484// Record this non-undef element for later. 3485if (!PrevElt || PrevElt->first != *Elt)
3486 PrevElt = std::make_pair(*Elt,
Idx);
3489// We need to have logged a step for this to count as a legal index sequence. 3490if (!SeqStepNum || !SeqStepDenom)
3493// Loop back through the sequence and validate elements we might have skipped 3494// while waiting for a valid step. While doing this, log any sequence addend. 3499 (
APInt(EltSizeInBits,
Idx,
/*isSigned=*/false,
/*implicitTrunc=*/true) *
3501 .sdiv(*SeqStepDenom);
3503APInt Addend = *Elt - ExpectedVal;
3506elseif (Addend != SeqAddend)
3510assert(SeqAddend &&
"Must have an addend if we have a step");
3512returnVIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3513 SeqAddend->getSExtValue()};
3516// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT 3517// and lower it as a VRGATHER_VX_VL from the source vector. 3524// Don't perform this optimization for i1 vectors, or if the element types are 3526// FIXME: Support i1 vectors, maybe by promoting to i8? 3528MVT SrcVT = Src.getSimpleValueType();
3532// The index must be a legal type. 3536// Check that we know Idx lies within VT 3538auto *CIdx = dyn_cast<ConstantSDNode>(
Idx);
3543// Convert fixed length vectors to scalable 3544MVT ContainerVT = VT;
3548MVT SrcContainerVT = SrcVT;
3554// Put Vec in a VT sized vector 3564// We checked that Idx fits inside VT earlier 3573/// Try and optimize BUILD_VECTORs with "dominant values" - these are values 3574/// which constitute a large proportion of the elements. In such cases we can 3575/// splat a vector with the dominant element and make up the shortfall with 3576/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. 3577/// Note that this includes vectors of 2 elements by association. The 3578/// upper-most element is the "dominant" one, allowing us to use a splat to 3579/// "insert" the upper element, and an insert of the lower element at position 3580/// 0, which improves codegen. 3583MVT VT =
Op.getSimpleValueType();
3595unsigned MostCommonCount = 0;
3597unsigned NumUndefElts =
3600// Track the number of scalar loads we know we'd be inserting, estimated as 3601// any non-zero floating-point constant. Other kinds of element are either 3602// already in registers or are materialized on demand. The threshold at which 3603// a vector load is more desirable than several scalar materializion and 3604// vector-insertion instructions is not known. 3605unsigned NumScalarLoads = 0;
3611unsigned &Count = ValueCounts[V];
3613if (
auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3614 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3616// Is this value dominant? In case of a tie, prefer the highest element as 3617// it's cheaper to insert near the beginning of a vector than it is at the 3619if (++Count >= MostCommonCount) {
3621 MostCommonCount = Count;
3625assert(DominantValue &&
"Not expecting an all-undef BUILD_VECTOR");
3626unsigned NumDefElts = NumElts - NumUndefElts;
3627unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3629// Don't perform this optimization when optimizing for size, since 3630// materializing elements and inserting them tends to cause code bloat. 3633 ((MostCommonCount > DominantValueCountThreshold) ||
3635// Start by splatting the most common element. 3640// We can handle an insert into the last element (of a splat) via 3641// v(f)slide1down. This is slightly better than the vslideup insert 3642// lowering as it avoids the need for a vector group temporary. It 3643// is also better than using vmerge.vx as it avoids the need to 3644// materialize the mask in a vector register. 3646 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3647 LastOp != DominantValue) {
3656 Processed.insert(LastOp);
3661constSDValue &V = OpIdx.value();
3662if (V.isUndef() || !Processed.insert(V).second)
3664if (ValueCounts[V] == 1) {
3668// Blend in all instances of this value using a VSELECT, using a 3669// mask where each bit signals whether that element is the one 3673 return DAG.getConstant(V == V1, DL, XLenVT);
3689MVT VT =
Op.getSimpleValueType();
3711// Lower constant mask BUILD_VECTORs via an integer vector type, in 3712// scalar integer chunks whose bit-width depends on the number of mask 3714// First, determine the most appropriate scalar integer type to use. This 3715// is at most XLenVT, but may be shrunk to a smaller vector element type 3716// according to the size of the final vector - use i8 chunks rather than 3717// XLenVT if we're producing a v8i1. This results in more consistent 3718// codegen across RV32 and RV64. 3719unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.
getXLen());
3720 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.
getELen());
3721// If we have to use more than one INSERT_VECTOR_ELT then this 3722// optimization is likely to increase code size; avoid peforming it in 3723// such a case. We can use a load from a constant pool in this case. 3726// Now we can create our integer vector type. Note that it may be larger 3727// than the resulting mask type: v4i1 would use v1i8 as its integer type. 3728unsigned IntegerViaVecElts =
divideCeil(NumElts, NumViaIntegerBits);
3729MVT IntegerViaVecVT =
3734unsigned BitPos = 0, IntegerEltIdx = 0;
3737for (
unsignedI = 0;
I < NumElts;) {
3739bool BitValue = !V.isUndef() && V->getAsZExtVal();
3740 Bits |= ((
uint64_t)BitValue << BitPos);
3744// Once we accumulate enough bits to fill our scalar type or process the 3745// last element, insert into our vector and clear our accumulated data. 3746if (
I % NumViaIntegerBits == 0 ||
I == NumElts) {
3747if (NumViaIntegerBits <= 32)
3748 Bits = SignExtend64<32>(Bits);
3750 Elts[IntegerEltIdx] = Elt;
3759if (NumElts < NumViaIntegerBits) {
3760// If we're producing a smaller vector than our minimum legal integer 3761// type, bitcast to the equivalent (known-legal) mask type, and extract 3763assert(IntegerViaVecVT == MVT::v1i8 &&
"Unexpected mask vector type");
3768// Else we must have produced an integer type with the same size as the 3769// mask type; bitcast for the final result. 3787// Try and match index sequences, which we can lower to the vid instruction 3788// with optional modifications. An all-undef vector is matched by 3789// getSplatValue, above. 3791 int64_t StepNumerator = SimpleVID->StepNumerator;
3792unsigned StepDenominator = SimpleVID->StepDenominator;
3793 int64_t Addend = SimpleVID->Addend;
3795assert(StepNumerator != 0 &&
"Invalid step");
3797 int64_t SplatStepVal = StepNumerator;
3799// Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it 3800// anyway as the shift of 63 won't fit in uimm5. 3801if (StepNumerator != 1 && StepNumerator !=
INT64_MIN &&
3803 Negate = StepNumerator < 0;
3805 SplatStepVal =
Log2_64(std::abs(StepNumerator));
3808// Only emit VIDs with suitably-small steps/addends. We use imm5 is a 3809// threshold since it's the immediate value many RVV instructions accept. 3810// There is no vmul.vi instruction so ensure multiply constant can fit in 3811// a single addi instruction. 3812if (((StepOpcode ==
ISD::MUL && isInt<12>(SplatStepVal)) ||
3813 (StepOpcode ==
ISD::SHL && isUInt<5>(SplatStepVal))) &&
3815 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3821// Convert right out of the scalable type so we can use standard ISD 3822// nodes for the rest of the computation. If we used scalable types with 3823// these, we'd lose the fixed-length vector info and generate worse 3826if ((StepOpcode ==
ISD::MUL && SplatStepVal != 1) ||
3827 (StepOpcode ==
ISD::SHL && SplatStepVal != 0)) {
3829 VID = DAG.
getNode(StepOpcode,
DL, VIDVT, VID, SplatStep);
3831if (StepDenominator != 1) {
3836if (Addend != 0 || Negate) {
3842// TODO: Use vfwcvt to reduce register pressure. 3849// For very small build_vectors, use a single scalar insert of a constant. 3850// TODO: Base this on constant rematerialization cost, not size. 3855assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3856"Unexpected sequence type");
3857// If we can use the original VL with the modified element type, this 3858// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3859// be moved into InsertVSETVLI? 3864uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3866// Construct the amalgamated value at this larger vector type. 3867for (
constauto &OpIdx :
enumerate(
Op->op_values())) {
3868constauto &SeqV = OpIdx.value();
3871 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3874// On RV64, sign-extend from 32 to 64 bits where possible in order to 3875// achieve better constant materializion. 3876// On RV32, we need to sign-extend to use getSignedConstant. 3877if (ViaIntVT == MVT::i32)
3878 SplatValue = SignExtend64<32>(SplatValue);
3892// Attempt to detect "hidden" splats, which only reveal themselves as splats 3893// when re-interpreted as a vector with a larger element type. For example, 3894// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 3895// could be instead splat as 3896// v2i32 = build_vector i32 0x00010000, i32 0x00010000 3897// TODO: This optimization could also work on non-constant splats, but it 3898// would require bit-manipulation instructions to construct the splat value. 3900constauto *BV = cast<BuildVectorSDNode>(
Op);
3903 BV->getRepeatedSequence(Sequence) &&
3904 (Sequence.size() * EltBitSize) <= Subtarget.
getELen()) {
3905unsigned SeqLen = Sequence.size();
3907assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3908 ViaIntVT == MVT::i64) &&
3909"Unexpected sequence type");
3911// If we can use the original VL with the modified element type, this 3912// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3913// be moved into InsertVSETVLI? 3914constunsigned RequiredVL = NumElts / SeqLen;
3915constunsigned ViaVecLen =
3917 NumElts : RequiredVL;
3921uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3923// Construct the amalgamated value which can be splatted as this larger 3925for (
constauto &SeqV : Sequence) {
3928 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3932// On RV64, sign-extend from 32 to 64 bits where possible in order to 3933// achieve better constant materializion. 3934// On RV32, we need to sign-extend to use getSignedConstant. 3935if (ViaIntVT == MVT::i32)
3936 SplatValue = SignExtend64<32>(SplatValue);
3938// Since we can't introduce illegal i64 types at this stage, we can only 3939// perform an i64 splat on RV32 if it is its own sign-extended value. That 3940// way we can use RVV instructions to splat. 3942 (!Subtarget.
is64Bit() && ViaIntVT == MVT::i64)) &&
3943"Unexpected bitcast sequence");
3944if (ViaIntVT.
bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3954if (ViaVecLen != RequiredVL)
3962// If the number of signbits allows, see if we can lower as a <N x i8>. 3963// Our main goal here is to reduce LMUL (and thus work) required to 3964// build the constant, but we will also narrow if the resulting 3965// narrow vector is known to materialize cheaply. 3966// TODO: We really should be costing the smaller vector. There are 3967// profitable cases this misses. 3974 Source, DAG, Subtarget);
3982// For constant vectors, use generic constant pool lowering. Otherwise, 3983// we'd have to materialize constants in GPRs just to move them into the 3996return Subtarget.
is64Bit() ? RISCV::PACKW : RISCV::PACK;
4003/// Double the element size of the build vector to reduce the number 4004/// of vslide1down in the build vector chain. In the worst case, this 4005/// trades three scalar operations for 1 vector operation. Scalar 4006/// operations are generally lower latency, and for out-of-order cores 4007/// we also benefit from additional parallelism. 4011MVT VT =
Op.getSimpleValueType();
4017// TODO: Relax these architectural restrictions, possibly with costing 4018// of the actual instructions required. 4019if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4024if (ElemSizeInBits >= std::min(Subtarget.
getELen(), Subtarget.
getXLen()) ||
4028// Produce [B,A] packed into a type twice as wide. Note that all 4029// scalars are XLenVT, possibly masked (see below). 4034// Bias the scheduling of the inserted operations to near the 4035// definition of the element - this tends to reduce register 4038if (Subtarget.hasStdExtZbkb())
4039// Note that we're relying on the high bits of the result being 4040// don't care. For PACKW, the result is *sign* extended. 4043 ElemDL, XLenVT,
A,
B),
4055 NewOperands.
reserve(NumElts / 2);
4057 NewOperands.
push_back(pack(
Op.getOperand(i),
Op.getOperand(i + 1)));
4067MVT VT =
Op.getSimpleValueType();
4075// Proper support for f16 requires Zvfh. bf16 always requires special 4076// handling. We need to cast the scalar to integer and create an integer 4078if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4083if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4084 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4085// Called by LegalizeDAG, we need to use XLenVT operations since we 4086// can't create illegal types. 4087if (
auto *
C = dyn_cast<ConstantFPSDNode>(Elem)) {
4088// Manually constant fold so the integer build_vector can be lowered 4089// better. Waiting for DAGCombine will be too late. 4097// Called by scalar type legalizer, we can use i16. 4114// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask 4115// vector type, we have a legal equivalently-sized i8 type, so we can use 4122// For a splat, perform a scalar truncate before creating the wider 4152// If we're compiling for an exact VLEN value, we can split our work per 4153// register in the register group. 4163// The following semantically builds up a fixed length concat_vector 4164// of the component build_vectors. We eagerly lower to scalable and 4165// insert_subvector here to avoid DAG combining it back to a large 4171auto OneVRegOfOps =
ArrayRef(BuildVectorOps).
slice(i, ElemsPerVReg);
4175unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4182// If we're about to resort to vslide1down (or stack usage), pack our 4183// elements into the widest scalar type we can. This will force a VL/VTYPE 4184// toggle, but reduces the critical path, the number of vslide1down ops 4185// required, and possibly enables scalar folds of the values. 4189// For m1 vectors, if we have non-undef values in both halves of our vector, 4190// split the vector into low and high halves, build them separately, then 4191// use a vselect to combine them. For long vectors, this cuts the critical 4192// path of the vslide1down sequence in half, and gives us an opportunity 4193// to special case each half independently. Note that we don't change the 4194// length of the sub-vectors here, so if both fallback to the generic 4195// vslide1down path, we should be able to fold the vselect into the final 4196// vslidedown (for the undef tail) for the first half w/ masking. 4198unsigned NumUndefElts =
4200unsigned NumDefElts = NumElts - NumUndefElts;
4201if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4208for (
unsigned i = 0; i < NumElts; i++) {
4210if (i < NumElts / 2) {
4217bool SelectMaskVal = (i < NumElts / 2);
4220assert(SubVecAOps.
size() == NumElts && SubVecBOps.
size() == NumElts &&
4221 MaskVals.
size() == NumElts);
4230// Cap the cost at a value linear to the number of elements in the vector. 4231// The default lowering is to use the stack. The vector store + scalar loads 4232// is linear in VL. However, at high lmuls vslide1down and vslidedown end up 4233// being (at least) linear in LMUL. As a result, using the vslidedown 4234// lowering for every element ends up being VL*LMUL.. 4235// TODO: Should we be directly costing the stack alternative? Doing so might 4236// give us a more accurate upper bound. 4239// TODO: unify with TTI getSlideCost. 4254// TODO: Should we be using the build instseq then cost + evaluate scheme 4255// we use for integer constants here? 4256unsigned UndefCount = 0;
4263 LinearBudget -= PerSlideCost;
4266 LinearBudget -= PerSlideCost;
4269 LinearBudget -= PerSlideCost;
4272if (LinearBudget < 0)
4277"Illegal type which will result in reserved encoding");
4289// Start our sequence with a TA splat in the hopes that hardware is able to 4290// recognize there's no dependency on the prior value of our temporary 4302 Vec,
Offset, Mask, VL, Policy);
4315 Vec,
Offset, Mask, VL, Policy);
4325if (isa<ConstantSDNode>(
Lo) && isa<ConstantSDNode>(
Hi)) {
4326 int32_t LoC = cast<ConstantSDNode>(
Lo)->getSExtValue();
4327 int32_t HiC = cast<ConstantSDNode>(
Hi)->getSExtValue();
4328// If Hi constant is all the same sign bit as Lo, lower this as a custom 4329// node in order to try and match RVV vector/scalar instructions. 4330if ((LoC >> 31) == HiC)
4333// If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, 4334// we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use 4335// vlmax vsetvli or vsetivli to change the VL. 4336// FIXME: Support larger constants? 4337// FIXME: Support non-constant VLs by saturating? 4341 (isa<RegisterSDNode>(VL) &&
4342 cast<RegisterSDNode>(VL)->
getReg() == RISCV::X0))
4344elseif (isa<ConstantSDNode>(VL) && isUInt<4>(VL->
getAsZExtVal()))
4357// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. 4359 isa<ConstantSDNode>(
Hi.getOperand(1)) &&
4360Hi.getConstantOperandVal(1) == 31)
4363// If the hi bits of the splat are undefined, then it's fine to just splat Lo 4364// even if it might be sign extended. 4368// Fall back to a stack store and stride x0 vector load. 4373// Called by type legalization to handle splat of i64 on RV32. 4374// FIXME: We can optimize this when the type has sign or zero bits in one 4379assert(Scalar.getValueType() == MVT::i64 &&
"Unexpected VT!");
4385// This function lowers a splat of a scalar operand Splat with the vector 4386// length VL. It ensures the final sequence is type legal, which is useful when 4387// lowering a splat after type legalization. 4391bool HasPassthru = Passthru && !Passthru.
isUndef();
4392if (!HasPassthru && !Passthru)
4399if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4400 EltVT == MVT::bf16) {
4401if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4402 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4415// Simplest case is that the operand needs to be promoted to XLenVT. 4416if (Scalar.getValueType().bitsLE(XLenVT)) {
4417// If the operand is a constant, sign extend to increase our chances 4418// of being able to use a .vi instruction. ANY_EXTEND would become a 4419// a zero extend and the simm5 check in isel would fail. 4420// FIXME: Should we ignore the upper bits in isel instead? 4423 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
4427assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4428"Unexpected scalar for splat lowering!");
4434// Otherwise use the more complicated splatting algorithm. 4438// This function lowers an insert of a scalar operand Scalar into lane 4439// 0 of the vector regardless of the value of VL. The contents of the 4440// remaining lanes of the result vector are unspecified. VL is assumed 4452SDValue ExtractedVal = Scalar.getOperand(0);
4453// The element types must be the same. 4457MVT ExtractedContainerVT = ExtractedVT;
4460 DAG, ExtractedContainerVT, Subtarget);
4462 ExtractedVal, DAG, Subtarget);
4464if (ExtractedContainerVT.
bitsLE(VT))
4477// Avoid the tricky legalization cases by falling back to using the 4478// splat code which already handles it gracefully. 4479if (!Scalar.getValueType().bitsLE(XLenVT))
4482 VT,
DL, DAG, Subtarget);
4484// If the operand is a constant, sign extend to increase our chances 4485// of being able to use a .vi instruction. ANY_EXTEND would become a 4486// a zero extend and the simm5 check in isel would fail. 4487// FIXME: Should we ignore the upper bits in isel instead? 4490 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
4495// Can this shuffle be performed on exactly one (possibly larger) input? 4503// Both input must be extracts. 4508// Extracting from the same source. 4510if (Src != V2.getOperand(0))
4513// Src needs to have twice the number of elements. 4515if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4518// The extracts must extract the two halves of the source. 4520 V2.getConstantOperandVal(1) != NumElts)
4526/// Is this shuffle interleaving contiguous elements from one vector into the 4527/// even elements and contiguous elements from another vector into the odd 4528/// elements. \p EvenSrc will contain the element that should be in the first 4529/// even element. \p OddSrc will contain the element that should be in the first 4530/// odd element. These can be the first element in a source or the element half 4531/// way through the source. 4534// We need to be able to widen elements to the next larger integer type. 4538intSize = Mask.size();
4540assert(
Size == (
int)NumElts &&
"Unexpected mask size");
4546 EvenSrc = StartIndexes[0];
4547 OddSrc = StartIndexes[1];
4549// One source should be low half of first vector. 4550if (EvenSrc != 0 && OddSrc != 0)
4553// Subvectors will be subtracted from either at the start of the two input 4554// vectors, or at the start and middle of the first vector if it's an unary 4556// In both cases, HalfNumElts will be extracted. 4557// We need to ensure that the extract indices are 0 or HalfNumElts otherwise 4558// we'll create an illegal extract_subvector. 4559// FIXME: We could support other values using a slidedown first. 4560int HalfNumElts = NumElts / 2;
4561return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4564/// Match shuffles that concatenate two vectors, rotate the concatenation, 4565/// and then extract the original number of elements from the rotated result. 4566/// This is equivalent to vector.splice or X86's PALIGNR instruction. The 4567/// returned rotation amount is for a rotate right, where elements move from 4568/// higher elements to lower elements. \p LoSrc indicates the first source 4569/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector 4570/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be 4571/// 0 or 1 if a rotation is found. 4573/// NOTE: We talk about rotate to the right which matches how bit shift and 4574/// rotate instructions are described where LSBs are on the right, but LLVM IR 4575/// and the table below write vectors with the lowest elements on the left. 4577intSize = Mask.size();
4579// We need to detect various ways of spelling a rotation: 4580// [11, 12, 13, 14, 15, 0, 1, 2] 4581// [-1, 12, 13, 14, -1, -1, 1, -1] 4582// [-1, -1, -1, -1, -1, -1, 1, 2] 4583// [ 3, 4, 5, 6, 7, 8, 9, 10] 4584// [-1, 4, 5, 6, -1, -1, 9, -1] 4585// [-1, 4, 5, 6, -1, -1, -1, -1] 4589for (
int i = 0; i !=
Size; ++i) {
4594// Determine where a rotate vector would have started. 4595int StartIdx = i - (M %
Size);
4596// The identity rotation isn't interesting, stop. 4600// If we found the tail of a vector the rotation must be the missing 4601// front. If we found the head of a vector, it must be how much of the 4603int CandidateRotation = StartIdx < 0 ? -StartIdx :
Size - StartIdx;
4606 Rotation = CandidateRotation;
4607elseif (Rotation != CandidateRotation)
4608// The rotations don't match, so we can't match this mask. 4611// Compute which value this mask is pointing at. 4612int MaskSrc = M <
Size ? 0 : 1;
4614// Compute which of the two target values this index should be assigned to. 4615// This reflects whether the high elements are remaining or the low elemnts 4617int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4619// Either set up this value if we've not encountered it before, or check 4620// that it remains consistent. 4622 TargetSrc = MaskSrc;
4623elseif (TargetSrc != MaskSrc)
4624// This may be a rotation, but it pulls from the inputs in some 4625// unsupported interleaving. 4629// Check that we successfully analyzed the mask, and normalize the results. 4630assert(Rotation != 0 &&
"Failed to locate a viable rotation!");
4631assert((LoSrc >= 0 || HiSrc >= 0) &&
4632"Failed to find a rotated input vector!");
4637// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be 4638// 2, 4, 8 and the integer type Factor-times larger than VT's 4639// element type must be a legal element type. 4640// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0) 4641// -> [p, q, r, s] (Factor=2, Index=1) 4646ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4653unsigned Shift = Index * EltBits;
4663// Lower the following shuffle to vslidedown. 4665// t49: v8i8 = extract_subvector t13, Constant:i64<0> 4666// t109: v8i8 = extract_subvector t13, Constant:i64<8> 4667// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 4669// t69: v16i16 = extract_subvector t68, Constant:i64<0> 4670// t23: v8i16 = extract_subvector t69, Constant:i64<0> 4671// t29: v4i16 = extract_subvector t23, Constant:i64<4> 4672// t26: v8i16 = extract_subvector t69, Constant:i64<8> 4673// t30: v4i16 = extract_subvector t26, Constant:i64<0> 4674// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 4680auto findNonEXTRACT_SUBVECTORParent =
4681 [](
SDValue Parent) -> std::pair<SDValue, uint64_t> {
4684// EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from 4685// a scalable vector. But we don't want to match the case. 4686 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4687Offset += Parent.getConstantOperandVal(1);
4688 Parent = Parent.getOperand(0);
4690return std::make_pair(Parent,
Offset);
4693auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4694auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4696// Extracting from the same source. 4701// Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. 4703for (
size_t i = 0; i != NewMask.
size(); ++i) {
4704if (NewMask[i] == -1)
4707if (
static_cast<size_t>(NewMask[i]) < NewMask.
size()) {
4708 NewMask[i] = NewMask[i] + V1IndexOffset;
4710// Minus NewMask.size() is needed. Otherwise, the b case would be 4711// <5,6,7,12> instead of <5,6,7,8>. 4712 NewMask[i] = NewMask[i] - NewMask.
size() + V2IndexOffset;
4716// First index must be known and non-zero. It will be used as the slidedown 4721// NewMask is also continuous. 4722for (
unsigned i = 1; i != NewMask.
size(); ++i)
4723if (NewMask[i - 1] + 1 != NewMask[i])
4727MVT SrcVT = Src.getSimpleValueType();
4740// Because vslideup leaves the destination elements at the start intact, we can 4741// use it to perform shuffles that insert subvectors: 4743// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> 4745// vsetvli zero, 8, e8, mf2, ta, ma 4746// vslideup.vi v8, v9, 4 4748// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> 4750// vsetvli zero, 5, e8, mf2, tu, ma 4751// vslideup.v1 v8, v9, 2 4758int NumSubElts, Index;
4763bool OpsSwapped = Mask[Index] < (int)NumElts;
4764SDValue InPlace = OpsSwapped ? V2 : V1;
4765SDValue ToInsert = OpsSwapped ? V1 : V2;
4770// We slide up by the index that the subvector is being inserted at, and set 4771// VL to the index + the number of elements being inserted. 4773// If the we're adding a suffix to the in place vector, i.e. inserting right 4774// up to the very end of it, then we don't actually care about the tail. 4775if (NumSubElts + Index >= (
int)NumElts)
4783// If we're inserting into the lowest elements, use a tail undisturbed 4789 Res =
getVSlideup(DAG, Subtarget,
DL, ContainerVT, InPlace, ToInsert,
4794/// Match v(f)slide1up/down idioms. These operations involve sliding 4795/// N-1 elements to make room for an inserted scalar at one end. 4801bool OpsSwapped =
false;
4802if (!isa<BuildVectorSDNode>(V1)) {
4803if (!isa<BuildVectorSDNode>(V2))
4808SDValueSplat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4812// Return true if the mask could describe a slide of Mask.size() - 1 4813// elements from concat_vector(V1, V2)[Base:] to [Offset:]. 4816constunsigned E = Mask.size() - ((
Offset > 0) ?
Offset : 0);
4817for (
unsigned i = S; i != E; ++i)
4818if (Mask[i] >= 0 && (
unsigned)Mask[i] !=
Base + i +
Offset)
4824bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4825if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4828constint InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4829// Inserted lane must come from splat, undef scalar is legal but not profitable. 4830if (InsertIdx < 0 || InsertIdx / NumElts != (
unsigned)OpsSwapped)
4836// zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h + 4837// vslide1{down,up}.vx instead. 4853auto OpCode = IsVSlidedown ?
4858auto Vec = DAG.
getNode(OpCode,
DL, ContainerVT,
4861Splat, TrueMask, VL);
4865// Match a mask which "spreads" the leading elements of a vector evenly 4866// across the result. Factor is the spread amount, and Index is the 4867// offset applied. (on success, Index < Factor) This is the inverse 4868// of a deinterleave with the same Factor and Index. This is analogous 4869// to an interleave, except that all but one lane is undef. 4872for (
unsigned i = 0; i < Mask.size(); i++)
4873 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4876for (
unsigned i = 0; i < Factor; i++) {
4887for (
unsigned i = 0; i < Mask.size() / Factor; i++) {
4888unsigned j = i * Factor + Index;
4889if (Mask[j] != -1 && (
unsigned)Mask[j] != i)
4895// Given a vector a, b, c, d return a vector Factor times longer 4896// with Factor-1 undef's between elements. Ex: 4897// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0) 4898// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1) 4902MVT VT = V.getSimpleValueType();
4910// TODO: On rv32, the constant becomes a splat_vector_parts which does not 4911// allow the SHL to fold away if Index is 0. 4915// Make sure to use original element type 4917 EC.multiplyCoefficientBy(Factor));
4921// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx 4922// to create an interleaved vector of <[vscale x] n*2 x ty>. 4923// This requires that the size of ty is less than the subtarget's maximum ELEN. 4928// FIXME: Not only does this optimize the code, it fixes some correctness 4929// issues because MIR does not have freeze. 4936MVT VecContainerVT = VecVT;
// <vscale x n x ty> 4937// Convert fixed vectors to scalable if needed 4946// We're working with a vector of the same size as the resulting 4947// interleaved vector, but with half the number of elements and 4948// twice the SEW (Hence the restriction on not using the maximum 4953MVT WideContainerVT = WideVT;
// <vscale x n x ty*2> 4957// Bitcast the input vectors to integers in case they are FP 4959 EvenV = DAG.
getBitcast(VecContainerVT, EvenV);
4966if (Subtarget.hasStdExtZvbb()) {
4967// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. 4971 OffsetVec, Passthru, Mask, VL);
4973 Interleaved, EvenV, Passthru, Mask, VL);
4975// FIXME: We should freeze the odd vector here. We already handled the case 4976// of provably undef/poison above. 4978// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with 4981 OddV, Passthru, Mask, VL);
4983// Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) 4987 OddV, AllOnesVec, Passthru, Mask, VL);
4989// Add the two together so we get 4990// (OddV * 0xff...ff) + (OddV + EvenV) 4991// = (OddV * 0x100...00) + EvenV 4992// = (OddV << VecVT.getScalarSizeInBits()) + EvenV 4993// Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx 4995 Interleaved, OddsMul, Passthru, Mask, VL);
4998// Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> 5002 Interleaved = DAG.
getBitcast(ResultContainerVT, Interleaved);
5004// Convert back to a fixed vector if needed 5015// If we have a vector of bits that we want to reverse, we can use a vbrev on a 5016// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. 5038// If we don't have zvbb or the larger element type > ELEN, the operation will 5045// If the bit vector doesn't fit exactly into the larger element type, we need 5046// to insert it into the larger vector and then shift up the reversed bits 5047// afterwards to get rid of the gap introduced. 5048if (ViaEltSize > NumElts)
5055// Shift up the reversed bits if the vector didn't exactly fit into the larger 5057if (ViaEltSize > NumElts)
5063if (ViaEltSize > NumElts)
5072MVT &RotateVT,
unsigned &RotateAmt) {
5080 NumElts, NumSubElts, RotateAmt))
5083 NumElts / NumSubElts);
5085// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. 5089// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can 5090// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this 5091// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. 5106// A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, 5107// so canonicalize to vrev8. 5117// If compiling with an exactly known VLEN, see if we can split a 5118// shuffle on m2 or larger into a small number of m1 sized shuffles 5119// which write each destination registers exactly once. 5129// If we don't know exact data layout, not much we can do. If this 5130// is already m1 or smaller, no point in splitting further. 5135// Avoid picking up bitrotate patterns which we have a linear-in-lmul 5151unsigned NumOfSrcRegs = NumElts / NumOpElts;
5152unsigned NumOfDestRegs = NumElts / NumOpElts;
5153// The following semantically builds up a fixed length concat_vector 5154// of the component shuffle_vectors. We eagerly lower to scalable here 5155// to avoid DAG combining it back to a large shuffle_vector again. 5161 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5163 [&](
ArrayRef<int> SrcSubMask,
unsigned SrcVecIdx,
unsigned DstVecIdx) {
5164Operands.emplace_back().emplace_back(
5165 SrcVecIdx, UINT_MAX,
5168 [&](
ArrayRef<int> SrcSubMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
5174assert(
Operands.size() == NumOfDestRegs &&
"Whole vector must be processed");
5175// Note: check that we do not emit too many shuffles here to prevent code 5177// TODO: investigate, if it can be improved by extra analysis of the masks to 5178// check if the code is more profitable. 5179unsigned NumShuffles = std::accumulate(
5186 for (const auto &P : Data) {
5187 unsigned Idx2 = std::get<1>(P);
5188 ArrayRef<int> Mask = std::get<2>(P);
5189 if (Idx2 != UINT_MAX)
5191 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5196if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5197 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5199auto ExtractValue = [&, &DAG = DAG](
SDValue SrcVec,
unsigned ExtractIdx) {
5201 DAG.getVectorIdxConstant(ExtractIdx,
DL));
5205auto PerformShuffle = [&, &DAG = DAG](
SDValue SubVec1,
SDValue SubVec2,
5207SDValue SubVec = DAG.getVectorShuffle(OneRegVT,
DL, SubVec1, SubVec2, Mask);
5210SDValue Vec = DAG.getUNDEF(ContainerVT);
5215for (
unsignedI : seq<unsigned>(
Data.size())) {
5216constauto &[Idx1, Idx2,
_] =
Data[
I];
5219"Expected both indices to be extracted already.");
5222SDValueV = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5223 (Idx1 % NumOfSrcRegs) * NumOpElts);
5225if (Idx2 != UINT_MAX)
5226 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5227 (Idx2 % NumOfSrcRegs) * NumOpElts);
5230for (
constauto &[Idx1, Idx2, Mask] :
Data) {
5232SDValueV2 = Idx2 == UINT_MAX ? V1 : Values.
at(Idx2);
5233V = PerformShuffle(V1, V2, Mask);
5237unsigned InsertIdx =
I * NumOpElts;
5240 DAG.getVectorIdxConstant(InsertIdx,
DL));
5245// Matches a subset of compress masks with a contiguous prefix of output 5246// elements. This could be extended to allow gaps by deciding which 5247// source elements to spuriously demand. 5250bool SawUndef =
false;
5251for (
unsigned i = 0; i < Mask.size(); i++) {
5258if (i > (
unsigned)Mask[i])
5267/// Given a shuffle where the indices are disjoint between the two sources, 5270/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4> 5272/// Merge the two sources into one and do a single source shuffle: 5274/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1> 5275/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0> 5277/// A vselect will either be merged into a masked instruction or be lowered as a 5278/// vmerge.vvm, which is cheaper than a vrgather.vv. 5288// Work out which source each lane will come from. 5291for (
intIdx : Mask) {
5294unsigned SrcIdx =
Idx % Mask.size();
5296if (Srcs[SrcIdx] == -1)
5297// Mark this source as using this lane. 5299elseif (Srcs[SrcIdx] != Src)
5300// The other source is using this lane: not disjoint. 5305for (
int Lane : Srcs) {
5316// Move all indices relative to the first source. 5318for (
unsignedI = 0;
I < Mask.size();
I++) {
5322 NewMask[
I] = Mask[
I] % Mask.size();
5328/// Try to widen element type to get a new mask value for a better permutation 5329/// sequence. This doesn't try to inspect the widened mask for profitability; 5330/// we speculate the widened form is equal or better. This has the effect of 5331/// reducing mask constant sizes - allowing cheaper materialization sequences 5332/// - and index sequence sizes - reducing register pressure and materialization 5333/// cost, at the cost of (possibly) an extra VTYPE toggle. 5336MVT VT =
Op.getSimpleValueType();
5343// Avoid wasted work leading to isTypeLegal check failing below 5344if (ElementSize > 32)
5367MVT VT =
Op.getSimpleValueType();
5372// Lower to a vror.vi of a larger element type if possible before we promote 5379// Promote i1 shuffle to i8 shuffle. 5382 V2 = V2.isUndef() ? DAG.
getUNDEF(WidenVT)
5398// Turn splatted vector load into a strided load with an X0 stride. 5400// Peek through CONCAT_VECTORS as VectorCombine can concat a vector 5402// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? 5406 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5407 V = V.getOperand(
Offset / OpElements);
5411// We need to ensure the load isn't atomic or volatile. 5413auto *Ld = cast<LoadSDNode>(V);
5418// If this is SEW=64 on RV32, use a strided load with a stride of x0. 5423SDValue Ops[] = {Ld->getChain(),
5437MVT SplatVT = ContainerVT;
5439// f16 with zvfhmin and bf16 need to use an integer scalar load. 5440if (SVT == MVT::bf16 ||
5441 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5446// Otherwise use a scalar load and splat. This will give the best 5447// opportunity to fold a splat into the operation. ISel can turn it into 5448// the x0 strided load if we aren't able to fold away the select. 5450 V = DAG.
getLoad(SVT,
DL, Ld->getChain(), NewAddr,
5451 Ld->getPointerInfo().getWithOffset(
Offset),
5452 Ld->getOriginalAlign(),
5456 Ld->getPointerInfo().getWithOffset(
Offset), SVT,
5457 Ld->getOriginalAlign(),
5458 Ld->getMemOperand()->getFlags());
5470assert(Lane < (
int)NumElts &&
"Unexpected lane!");
5473 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
5478// For exact VLEN m2 or greater, try to split to m1 operations if we 5479// can split cleanly. 5493// A bitrotate will be one instruction on Zvkb, so try to lower to it first if 5495if (Subtarget.hasStdExtZvkb())
5499// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may 5500// be undef which can be handled with a single SLIDEDOWN/UP. 5506 LoV = LoSrc == 0 ? V1 : V2;
5510 HiV = HiSrc == 0 ? V1 : V2;
5514// We found a rotation. We need to slide HiV down by Rotation. Then we need 5515// to slide LoV up by (NumElts - Rotation). 5516unsigned InvRotate = NumElts - Rotation;
5520// Even though we could use a smaller VL, don't to avoid a vsetivli 5536// If this is a deinterleave(2,4,8) and we can widen the vector, then we can 5537// use shift and truncate to perform the shuffle. 5538// TODO: For Factor=6, we can perform the first step of the deinterleave via 5539// shift-and-trunc reducing total cost for everything except an mf8 result. 5540// TODO: For Factor=4,8, we can do the same when the ratio isn't high enough 5541// to do the entire operation. 5544assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5545for (
unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5559// Detect an interleave shuffle and lower to 5560// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) 5563// Extract the halves of the vectors. 5566// Recognize if one half is actually undef; the matching above will 5567// otherwise reuse the even stream for the undef one. This improves 5568// spread(2) shuffles. 5569bool LaneIsUndef[2] = {
true,
true};
5570for (
unsigned i = 0; i < Mask.size(); i++)
5571 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5573intSize = Mask.size();
5575if (LaneIsUndef[0]) {
5578assert(EvenSrc >= 0 &&
"Undef source?");
5579 EvenV = (EvenSrc /
Size) == 0 ? V1 : V2;
5584if (LaneIsUndef[1]) {
5587assert(OddSrc >= 0 &&
"Undef source?");
5588 OddV = (OddSrc /
Size) == 0 ? V1 : V2;
5597// Handle any remaining single source shuffles 5598assert(!V1.
isUndef() &&
"Unexpected shuffle canonicalization");
5600// We might be able to express the shuffle as a bitrotate. But even if we 5601// don't have Zvkb and have to expand, the expanded sequence of approx. 2 5602// shifts and a vor will have a higher throughput than a vrgather. 5606// Before hitting generic lowering fallbacks, try to widen the mask 5611// Can we generate a vcompress instead of a vrgather? These scale better 5612// at high LMUL, at the cost of not being able to fold a following select 5613// into them. The mask constants are also smaller than the index vector 5614// constants, and thus easier to materialize. 5618for (
autoIdx : Mask) {
5630// Match a spread(4,8) which can be done via extend and shift. Spread(2) 5631// is fully covered in interleave(2) above, so it is ignored here. 5634assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5635for (
unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5648any_of(Mask, [&](
constauto &
Idx) {
returnIdx > 255; })) {
5649// On such a vector we're unable to use i8 as the index type. 5650// FIXME: We could promote the index to i16 and use vrgatherei16, but that 5651// may involve vector splitting if we're already at LMUL=8, or our 5652// user-supplied maximum fixed-length LMUL. 5656// Base case for the two operand recursion below - handle the worst case 5657// single source shuffle. 5660// Since we can't introduce illegal index types at this stage, use i16 and 5661// vrgatherei16 if the corresponding index type for plain vrgather is greater 5668// If the mask allows, we can do all the index computation in 16 bits. This 5669// requires less work and less register pressure at high LMUL, and creates 5670// smaller constants which may be cheaper to materialize. 5677MVT IndexContainerVT =
5682for (
int MaskIndex : Mask) {
5683bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5692 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
5696// As a backup, shuffles can be lowered via a vrgather instruction, possibly 5697// merged with a second vrgather. 5700// Now construct the mask that will be used by the blended vrgather operation. 5701// Construct the appropriate indices into each vector. 5702for (
int MaskIndex : Mask) {
5703bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5704 ShuffleMaskLHS.
push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5706 ShuffleMaskRHS.
push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5709// If the mask indices are disjoint between the two sources, we can lower it 5710// as a vselect + a single source vrgather.vv. Don't do this if we think the 5711// operands may end up being lowered to something cheaper than a vrgather.vv. 5720// Before hitting generic lowering fallbacks, try to widen the mask 5725// Try to pick a profitable operand order. 5729// Recursively invoke lowering for each operand if we had two 5730// independent single source shuffles, and then combine the result via a 5731// vselect. Note that the vselect will likely be folded back into the 5732// second permute (vrgather, or other) by the post-isel combine. 5737for (
int MaskIndex : Mask) {
5738bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5742assert(MaskVals.
size() == NumElts &&
"Unexpected select-like shuffle");
5752// Only support legal VTs for other shuffles for now. 5756// Support splats for any type. These should type legalize well. 5762// Not for i1 vectors. 5771// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting 5774RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(
SDValueOp,
5776MVT VT =
Op.getSimpleValueType();
5780MVT ContainerVT = VT;
5783if (
Op->isVPOpcode()) {
5784 Mask =
Op.getOperand(1);
5788 VL =
Op.getOperand(2);
5791// We choose FP type that can represent the value if possible. Otherwise, we 5792// use rounding to zero conversion for correct exponent of the result. 5793// TODO: Use f16 for i8 when possible? 5794MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5796 FloatEltVT = MVT::f32;
5799// Legal types should have been checked in the RISCVTargetLowering 5801// TODO: Splitting may make sense in some cases. 5803"Expected legal float type!");
5805// For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. 5806// The trailing zero count is equal to log2 of this single bit value. 5810 }
elseif (
Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5813 Src = DAG.
getNode(ISD::VP_AND,
DL, VT, Src, Neg, Mask, VL);
5816// We have a legal FP type, convert to it. 5819if (
Op->isVPOpcode())
5820 FloatVal = DAG.
getNode(ISD::VP_UINT_TO_FP,
DL, FloatVT, Src, Mask, VL);
5824// Use RTZ to avoid rounding influencing exponent of FloatVal. 5829if (!
Op->isVPOpcode())
5833MVT ContainerFloatVT =
5836 Src, Mask, RTZRM, VL);
5840// Bitcast to integer and shift the exponent to the LSB. 5843unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5846// Restore back to original type. Truncation after SRL is to generate vnsrl. 5847if (
Op->isVPOpcode()) {
5860// The exponent contains log2 of the value in biased form. 5861unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5862// For trailing zeros, we just need to subtract the bias. 5866if (
Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5867return DAG.
getNode(ISD::VP_SUB,
DL, VT, Exp,
5870// For leading zeros, we need to remove the bias and convert from log2 to 5871// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). 5872unsigned Adjust = ExponentBias + (EltSize - 1);
5874if (
Op->isVPOpcode())
5880// The above result with zero input equals to Adjust which is greater than 5881// EltSize. Hence, we can do min(Res, EltSize) for CTLZ. 5884elseif (
Op.getOpcode() == ISD::VP_CTLZ)
5885 Res = DAG.
getNode(ISD::VP_UMIN,
DL, VT, Res,
5904 SrcVT = ContainerVT;
5907// Convert to boolean vector. 5917if (
Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5918// In this case, we can interpret poison as -1, so nothing to do further. 5928// While RVV has alignment restrictions, we should always be able to load as a 5929// legal equivalently-sized byte-typed vector instead. This method is 5930// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If 5931// the load is already correctly-aligned, it returns SDValue(). 5934auto *
Load = cast<LoadSDNode>(
Op);
5935assert(Load &&
Load->getMemoryVT().isVector() &&
"Expected vector load");
5939 *
Load->getMemOperand()))
5943MVT VT =
Op.getSimpleValueType();
5945assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5946"Unexpected unaligned RVV load type");
5950"Expecting equally-sized RVV vector types to be legal");
5952Load->getPointerInfo(),
Load->getOriginalAlign(),
5953Load->getMemOperand()->getFlags());
5957// While RVV has alignment restrictions, we should always be able to store as a 5958// legal equivalently-sized byte-typed vector instead. This method is 5959// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It 5960// returns SDValue() if the store is already correctly aligned. 5963auto *
Store = cast<StoreSDNode>(
Op);
5964assert(Store &&
Store->getValue().getValueType().isVector() &&
5965"Expected vector store");
5968Store->getMemoryVT(),
5969 *
Store->getMemOperand()))
5976assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5977"Unexpected unaligned RVV store type");
5981"Expecting equally-sized RVV vector types to be legal");
5982 StoredVal = DAG.
getBitcast(NewVT, StoredVal);
5984Store->getPointerInfo(),
Store->getOriginalAlign(),
5985Store->getMemOperand()->getFlags());
5990assert(
Op.getValueType() == MVT::i64 &&
"Unexpected VT");
5992 int64_t Imm = cast<ConstantSDNode>(
Op)->getSExtValue();
5994// All simm32 constants should be handled by isel. 5995// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making 5996// this check redundant, but small immediates are common so this check 5997// should have better compile time. 6001// We only need to cost the immediate, if constant pool lowering is enabled. 6009// Optimizations below are disabled for opt size. If we're optimizing for 6010// size, use a constant pool. 6014// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do 6015// that if it will avoid a constant pool. 6016// It will require an extra temporary register though. 6017// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 6018// low and high 32 bits are the same and bit 31 and 63 are set. 6019unsigned ShiftAmt, AddOpc;
6030MVT VT =
Op.getSimpleValueType();
6031constAPFloat &
Imm = cast<ConstantFPSDNode>(
Op)->getValueAPF();
6033// Can this constant be selected by a Zfa FLI instruction? 6037// If the constant is negative, try negating. 6038if (Index < 0 &&
Imm.isNegative()) {
6043// If we couldn't find a FLI lowering, fall back to generic code. 6047// Emit an FLI+FNEG. We use a custom node to hide from constant folding. 6066if (Subtarget.hasStdExtZtso()) {
6067// The only fence that needs an instruction is a sequentially-consistent 6068// cross-thread fence. 6073// MEMBARRIER is a compiler barrier; it codegens to a no-op. 6077// singlethread fences only synchronize with signal handlers on the same 6078// thread and thus only need to preserve instruction order, not actually 6079// enforce memory ordering. 6081// MEMBARRIER is a compiler barrier; it codegens to a no-op. 6090MVT VT =
Op.getSimpleValueType();
6092unsignedCheck =
Op.getConstantOperandVal(1);
6093unsigned TDCMask = 0;
6121MVT VT0 =
Op.getOperand(0).getSimpleValueType();
6126if (
Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6128 VL =
Op.getOperand(3);
6146if (
Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6148MVT MaskContainerVT =
6151 VL =
Op.getOperand(3);
6156 Mask, VL,
Op->getFlags());
6159 DAG.
getUNDEF(ContainerDstVT), TDCMaskV, VL);
6164 DAG.
getUNDEF(ContainerVT), Mask, VL});
6168 TDCMaskV, DAG.
getUNDEF(ContainerDstVT), Mask, VL);
6172 DAG.
getUNDEF(ContainerDstVT), SplatZero, VL);
6176 DAG.
getUNDEF(ContainerVT), Mask, VL});
6187// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these 6188// operations propagate nans. 6192MVT VT =
Op.getSimpleValueType();
6200// If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This 6201// ensures that when one input is a nan, the other will also be a nan 6202// allowing the nan to propagate. If both inputs are nan, this will swap the 6203// inputs which is harmless. 6219return DAG.
getNode(Opc,
DL, VT, NewX, NewY);
6222// Check no NaNs before converting to fixed vector scalable. 6226MVT ContainerVT = VT;
6234if (
Op->isVPOpcode()) {
6235 Mask =
Op.getOperand(2);
6239 VL =
Op.getOperand(3);
6247 {X, X, DAG.getCondCode(ISD::SETOEQ),
6248 DAG.getUNDEF(ContainerVT), Mask, VL});
6256 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6257 DAG.getUNDEF(ContainerVT), Mask, VL});
6267 DAG.
getUNDEF(ContainerVT), Mask, VL);
6277"Wrong opcode for lowering FABS or FNEG.");
6280MVT VT =
Op.getSimpleValueType();
6281assert((VT == MVT::f16 || VT == MVT::bf16) &&
"Unexpected type");
6288 Mask = Mask.sext(Subtarget.
getXLen());
6301MVT VT =
Op.getSimpleValueType();
6302assert((VT == MVT::f16 || VT == MVT::bf16) &&
"Unexpected type");
6309// Get sign bit into an integer value. 6312if (SignSize == Subtarget.
getXLen()) {
6314 }
elseif (SignSize == 16) {
6316 }
elseif (SignSize == 32) {
6318 }
elseif (SignSize == 64) {
6319assert(XLenVT == MVT::i32 &&
"Unexpected type");
6320// Copy the upper word to integer. 6327// Get the signbit at the right position for MagAsInt. 6329if (ShiftAmount > 0) {
6332 }
elseif (ShiftAmount < 0) {
6337// Mask the sign bit and any bits above it. The extra bits will be dropped 6338// when we convert back to FP. 6343// Transform Mag value to integer, and clear the sign bit. 6356/// Get a RISC-V target specified VL op for a given SDNode. 6358#define OP_CASE(NODE) \ 6360 return RISCVISD::NODE##_VL; 6361#define VP_CASE(NODE) \ 6362 case ISD::VP_##NODE: \ 6363 return RISCVISD::NODE##_VL; 6365switch (
Op.getOpcode()) {
6429VP_CASE(FCOPYSIGN)
// VP_FCOPYSIGN 6431VP_CASE(SINT_TO_FP)
// VP_SINT_TO_FP 6432VP_CASE(UINT_TO_FP)
// VP_UINT_TO_FP 6433VP_CASE(BITREVERSE)
// VP_BITREVERSE 6443case ISD::VP_CTLZ_ZERO_UNDEF:
6446case ISD::VP_CTTZ_ZERO_UNDEF:
6455if (
Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6460if (
Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6465if (
Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6477case ISD::VP_SIGN_EXTEND:
6479case ISD::VP_ZERO_EXTEND:
6481case ISD::VP_FP_TO_SINT:
6483case ISD::VP_FP_TO_UINT:
6486case ISD::VP_FMINNUM:
6489case ISD::VP_FMAXNUM:
6502/// Return true if a RISC-V target specified op has a passthru operand. 6506"not a RISC-V target specific op");
6510"adding target specific op should update this function");
6526/// Return true if a RISC-V target specified op has a mask operand. 6530"not a RISC-V target specific op");
6534"adding target specific op should update this function");
6547if (
Op.getValueType() == MVT::nxv32f16 &&
6551if (
Op.getValueType() == MVT::nxv32bf16)
6564if (!
Op.getOperand(j).getValueType().isVector()) {
6565 LoOperands[j] =
Op.getOperand(j);
6566 HiOperands[j] =
Op.getOperand(j);
6569 std::tie(LoOperands[j], HiOperands[j]) =
6574 DAG.
getNode(
Op.getOpcode(),
DL, LoVT, LoOperands,
Op->getFlags());
6576 DAG.
getNode(
Op.getOpcode(),
DL, HiVT, HiOperands,
Op->getFlags());
6591 std::tie(LoOperands[j], HiOperands[j]) =
6595if (!
Op.getOperand(j).getValueType().isVector()) {
6596 LoOperands[j] =
Op.getOperand(j);
6597 HiOperands[j] =
Op.getOperand(j);
6600 std::tie(LoOperands[j], HiOperands[j]) =
6605 DAG.
getNode(
Op.getOpcode(),
DL, LoVT, LoOperands,
Op->getFlags());
6607 DAG.
getNode(
Op.getOpcode(),
DL, HiVT, HiOperands,
Op->getFlags());
6617auto [EVLLo, EVLHi] =
6618 DAG.
SplitEVL(
Op.getOperand(3),
Op.getOperand(1).getValueType(),
DL);
6622 {Op.getOperand(0), Lo, MaskLo, EVLLo},
Op->getFlags());
6624 {ResLo, Hi, MaskHi, EVLHi},
Op->getFlags());
6642if (!
Op.getOperand(j).getValueType().isVector()) {
6643 LoOperands[j] =
Op.getOperand(j);
6644 HiOperands[j] =
Op.getOperand(j);
6647 std::tie(LoOperands[j], HiOperands[j]) =
6652 DAG.
getNode(
Op.getOpcode(),
DL, LoVTs, LoOperands,
Op->getFlags());
6655 DAG.
getNode(
Op.getOpcode(),
DL, HiVTs, HiOperands,
Op->getFlags());
6664switch (
Op.getOpcode()) {
6670return lowerGlobalAddress(
Op, DAG);
6672return lowerBlockAddress(
Op, DAG);
6674return lowerConstantPool(
Op, DAG);
6676return lowerJumpTable(
Op, DAG);
6678return lowerGlobalTLSAddress(
Op, DAG);
6682return lowerConstantFP(
Op, DAG);
6684return lowerSELECT(
Op, DAG);
6686return lowerBRCOND(
Op, DAG);
6688return lowerVASTART(
Op, DAG);
6690return lowerFRAMEADDR(
Op, DAG);
6692return lowerRETURNADDR(
Op, DAG);
6694return lowerShiftLeftParts(
Op, DAG);
6696return lowerShiftRightParts(
Op, DAG,
true);
6698return lowerShiftRightParts(
Op, DAG,
false);
6701if (
Op.getValueType().isFixedLengthVector()) {
6702assert(Subtarget.hasStdExtZvkb());
6703return lowerToScalableOp(
Op, DAG);
6705assert(Subtarget.hasVendorXTHeadBb() &&
6706 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6707"Unexpected custom legalization");
6708// XTHeadBb only supports rotate by constant. 6709if (!isa<ConstantSDNode>(
Op.getOperand(1)))
6714EVT VT =
Op.getValueType();
6718if (Op0VT == MVT::i16 &&
6720 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6724if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.
is64Bit() &&
6729if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.
is64Bit() &&
6736// Consider other scalar<->scalar casts as legal if the types are legal. 6737// Otherwise expand them. 6748// We can handle fixed length vector bitcasts with a simple replacement 6752// When bitcasting from scalar to fixed-length vector, insert the scalar 6753// into a one-element vector of the result type, and perform a vector 6765// Custom-legalize bitcasts from fixed-length vector types to scalar types 6766// thus: bitcast the vector to a one-element vector type whose element type 6767// is the same as the result type, and extract the first element. 6779return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
6781return LowerINTRINSIC_W_CHAIN(
Op, DAG);
6783return LowerINTRINSIC_VOID(
Op, DAG);
6785return LowerIS_FPCLASS(
Op, DAG);
6787MVT VT =
Op.getSimpleValueType();
6789assert(Subtarget.hasStdExtZvbb());
6790return lowerToScalableOp(
Op, DAG);
6793assert(Subtarget.hasStdExtZbkb() &&
"Unexpected custom legalization");
6795// Expand bitreverse to a bswap(rev8) followed by brev8. 6802// Only custom-lower vector truncates 6803if (!
Op.getSimpleValueType().isVector())
6805return lowerVectorTruncLike(
Op, DAG);
6808if (
Op.getOperand(0).getValueType().isVector() &&
6809Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6810return lowerVectorMaskExt(
Op, DAG,
/*ExtVal*/ 1);
6813if (
Op.getOperand(0).getValueType().isVector() &&
6814Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6815return lowerVectorMaskExt(
Op, DAG,
/*ExtVal*/ -1);
6818return lowerSPLAT_VECTOR_PARTS(
Op, DAG);
6820return lowerINSERT_VECTOR_ELT(
Op, DAG);
6822return lowerEXTRACT_VECTOR_ELT(
Op, DAG);
6824MVT VT =
Op.getSimpleValueType();
6832MVT ContainerVT = VT;
6840 DAG.
getUNDEF(ContainerVT), Scalar, VL);
6844 DAG.
getUNDEF(ContainerVT), Scalar, VL);
6852MVT VT =
Op.getSimpleValueType();
6855// We define our scalable vector types for lmul=1 to use a 64 bit known 6856// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 6857// vscale as VLENB / 8. 6861// We assume VLENB is a multiple of 8. We manually choose the best shift 6862// here because SimplifyDemandedBits isn't always able to simplify it. 6872 }
elseif ((Val % 8) == 0) {
6873// If the multiplier is a multiple of 8, scale it down to avoid needing 6874// to shift the VLENB value. 6886// Custom promote f16 powi with illegal i32 integer type on RV64. Once 6887// promoted this will be legalized into a libcall by LegalizeIntegerTypes. 6888if (
Op.getValueType() == MVT::f16 && Subtarget.
is64Bit() &&
6889Op.getOperand(1).getValueType() == MVT::i32) {
6906return lowerVectorFPExtendOrRoundLike(
Op, DAG);
6909return lowerStrictFPExtendOrRoundLike(
Op, DAG);
6912if (
Op.getValueType().isVector() &&
6913 ((
Op.getValueType().getScalarType() == MVT::f16 &&
6916Op.getValueType().getScalarType() == MVT::bf16)) {
6932 Op1.getValueType().isVector() &&
6933 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6936 Op1.getValueType().getScalarType() == MVT::bf16)) {
6942 Op1.getValueType().getVectorElementCount());
6945return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), WidenVec);
6952// RVV can only do fp<->int conversions to types half/double the size as 6953// the source. We custom-lower any conversions that do two hops into 6955MVT VT =
Op.getSimpleValueType();
6958bool IsStrict =
Op->isStrictFPOpcode();
6959SDValue Src =
Op.getOperand(0 + IsStrict);
6960MVT SrcVT = Src.getSimpleValueType();
6971"Unexpected vector element types");
6974// Widening conversions 6975if (EltSize > (2 * SrcEltSize)) {
6977// Do a regular integer sign/zero extension then convert to float. 6987Op.getOperand(0), Ext);
6991assert(SrcEltVT == MVT::f16 &&
"Unexpected FP_TO_[US]INT lowering");
6992// Do one doubling fp_extend then complete the operation by converting 6998return DAG.
getNode(
Op.getOpcode(),
DL,
Op->getVTList(), Chain, FExt);
7004// Narrowing conversions 7005if (SrcEltSize > (2 * EltSize)) {
7007// One narrowing int_to_fp, then an fp_round. 7008assert(EltVT == MVT::f16 &&
"Unexpected [US]_TO_FP lowering");
7013Op.getOperand(0), Src);
7021// One narrowing fp_to_int, then truncate the integer. If the float isn't 7022// representable by the integer, the result is poison. 7028Op.getOperand(0), Src);
7036// Scalable vectors can exit here. Patterns will handle equally-sized 7037// conversions halving/doubling ones. 7041// For fixed-length vectors we lower to a custom "VL" node. 7043switch (
Op.getOpcode()) {
7075"Expected same element count");
7082Op.getOperand(0), Src, Mask, VL);
7086 Src = DAG.
getNode(RVVOpc,
DL, ContainerVT, Src, Mask, VL);
7093// Custom lower to ensure the libcall return is passed in an FPR on hard 7108MVT VT =
Op.getSimpleValueType();
7116// fp_extend if the target VT is bigger than f32. 7123// Custom lower to ensure the libcall return is passed in an FPR on hard 7128bool IsStrict =
Op->isStrictFPOpcode();
7129SDValue Op0 = IsStrict ?
Op.getOperand(1) :
Op.getOperand(0);
7133 std::tie(Res, Chain) =
7144// Custom lower to ensure the libcall argument is passed in an FPR on hard 7149bool IsStrict =
Op->isStrictFPOpcode();
7150SDValue Op0 = IsStrict ?
Op.getOperand(1) :
Op.getOperand(0);
7156 std::tie(Res, Chain) =
makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7157 CallOptions,
DL, Chain);
7174if (
Op.getValueType().isVector())
7179assert(
Op.getOperand(0).getValueType() == MVT::f16 &&
7180"Unexpected custom legalisation");
7183return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
7189assert(
Op.getOperand(1).getValueType() == MVT::f16 &&
7190"Unexpected custom legalisation");
7193 {
Op.getOperand(0),
Op.getOperand(1)});
7194return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
7195 {Ext.getValue(1), Ext.getValue(0)});
7202return lowerVECREDUCE(
Op, DAG);
7206if (
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7207return lowerVectorMaskVecReduction(
Op, DAG,
/*IsVP*/false);
7208return lowerVECREDUCE(
Op, DAG);
7215return lowerFPVECREDUCE(
Op, DAG);
7216case ISD::VP_REDUCE_ADD:
7217case ISD::VP_REDUCE_UMAX:
7218case ISD::VP_REDUCE_SMAX:
7219case ISD::VP_REDUCE_UMIN:
7220case ISD::VP_REDUCE_SMIN:
7221case ISD::VP_REDUCE_FADD:
7222case ISD::VP_REDUCE_SEQ_FADD:
7223case ISD::VP_REDUCE_FMIN:
7224case ISD::VP_REDUCE_FMAX:
7225case ISD::VP_REDUCE_FMINIMUM:
7226case ISD::VP_REDUCE_FMAXIMUM:
7229return lowerVPREDUCE(
Op, DAG);
7230case ISD::VP_REDUCE_AND:
7231case ISD::VP_REDUCE_OR:
7232case ISD::VP_REDUCE_XOR:
7233if (
Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7234return lowerVectorMaskVecReduction(
Op, DAG,
/*IsVP*/true);
7235return lowerVPREDUCE(
Op, DAG);
7236case ISD::VP_CTTZ_ELTS:
7237case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7238return lowerVPCttzElements(
Op, DAG);
7242 DAG.
getUNDEF(ContainerVT), DAG, Subtarget);
7245return lowerINSERT_SUBVECTOR(
Op, DAG);
7247return lowerEXTRACT_SUBVECTOR(
Op, DAG);
7249return lowerVECTOR_DEINTERLEAVE(
Op, DAG);
7251return lowerVECTOR_INTERLEAVE(
Op, DAG);
7253return lowerSTEP_VECTOR(
Op, DAG);
7255return lowerVECTOR_REVERSE(
Op, DAG);
7257return lowerVECTOR_SPLICE(
Op, DAG);
7261MVT VT =
Op.getSimpleValueType();
7263if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7264 EltVT == MVT::bf16) {
7267if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7268 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7278if (EltVT == MVT::i1)
7279return lowerVectorMaskSplat(
Op, DAG);
7285// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 7286// better than going through the stack, as the default expansion does. 7288MVT VT =
Op.getSimpleValueType();
7289MVT ContainerVT = VT;
7293// Recursively split concat_vectors with more than 2 operands: 7295// concat_vector op1, op2, op3, op4 7297// concat_vector (concat_vector op1, op2), (concat_vector op3, op4) 7299// This reduces the length of the chain of vslideups and allows us to 7300// perform the vslideups at a smaller LMUL, limited to MF2. 7307Op->ops().take_front(HalfNumOps));
7309Op->ops().drop_front(HalfNumOps));
7314Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7317SDValue SubVec = OpIdx.value();
7318// Don't insert undef subvectors. 7328auto *Load = cast<LoadSDNode>(
Op);
7329EVT VecTy = Load->getMemoryVT();
7330// Handle normal vector tuple load. 7336unsigned NumElts = Sz / (NF * 8);
7337int Log2LMUL =
Log2_64(NumElts) - 3;
7340 Flag.setNoUnsignedWrap(
true);
7342SDValue BasePtr = Load->getBasePtr();
7349// Load NF vector registers and combine them to a vector tuple. 7350for (
unsigned i = 0; i < NF; ++i) {
7363if (
auto V = expandUnalignedRVVLoad(
Op, DAG))
7365if (
Op.getValueType().isFixedLengthVector())
7366return lowerFixedLengthVectorLoadToRVV(
Op, DAG);
7370auto *Store = cast<StoreSDNode>(
Op);
7371SDValue StoredVal = Store->getValue();
7373// Handle normal vector tuple store. 7379unsigned NumElts = Sz / (NF * 8);
7380int Log2LMUL =
Log2_64(NumElts) - 3;
7383 Flag.setNoUnsignedWrap(
true);
7385SDValue Chain = Store->getChain();
7386SDValue BasePtr = Store->getBasePtr();
7392// Extract subregisters in a vector tuple and store them individually. 7393for (
unsigned i = 0; i < NF; ++i) {
7397 Ret = DAG.
getStore(Chain,
DL, Extract, BasePtr,
7399 Store->getOriginalAlign(),
7400 Store->getMemOperand()->getFlags());
7401 Chain = Ret.getValue(0);
7407if (
auto V = expandUnalignedRVVStore(
Op, DAG))
7409if (
Op.getOperand(1).getValueType().isFixedLengthVector())
7410return lowerFixedLengthVectorStoreToRVV(
Op, DAG);
7415return lowerMaskedLoad(
Op, DAG);
7418return lowerMaskedStore(
Op, DAG);
7420return lowerVectorCompress(
Op, DAG);
7422// This occurs because we custom legalize SETGT and SETUGT for setcc. That 7423// causes LegalizeDAG to think we need to custom legalize select_cc. Expand 7424// into separate SETCC+SELECT just like LegalizeDAG. 7429EVT VT =
Op.getValueType();
7440MVT OpVT =
Op.getOperand(0).getSimpleValueType();
7442MVT VT =
Op.getSimpleValueType();
7447"Unexpected CondCode");
7451// If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can 7452// convert this to the equivalent of (set(u)ge X, C+1) by using 7453// (xori (slti(u) X, C+1), 1). This avoids materializing a small constant 7455if (isa<ConstantSDNode>(
RHS)) {
7456 int64_t Imm = cast<ConstantSDNode>(
RHS)->getSExtValue();
7457if (Imm != 0 && isInt<12>((
uint64_t)Imm + 1)) {
7458// If this is an unsigned compare and the constant is -1, incrementing 7459// the constant would change behavior. The result should be false. 7462// Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. 7470// Not a constant we could handle, swap the operands and condition code to 7479return lowerFixedLengthVectorSetccToRVV(
Op, DAG);
7495return lowerToScalableOp(
Op, DAG);
7499if (
Op.getSimpleValueType().isFixedLengthVector())
7500return lowerToScalableOp(
Op, DAG);
7501// This can be called for an i32 shift amount that needs to be promoted. 7502assert(
Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.
is64Bit() &&
7503"Unexpected custom legalisation");
7507if (
Op.getValueType() == MVT::f16 ||
Op.getValueType() == MVT::bf16)
7533return lowerToScalableOp(
Op, DAG);
7537EVT VT =
Op->getValueType(0);
7542// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs)) 7543// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs)) 7552return lowerABS(
Op, DAG);
7557if (Subtarget.hasStdExtZvbb())
7558return lowerToScalableOp(
Op, DAG);
7560return lowerCTLZ_CTTZ_ZERO_UNDEF(
Op, DAG);
7562return lowerFixedLengthVectorSelectToRVV(
Op, DAG);
7564if (
Op.getValueType() == MVT::f16 ||
Op.getValueType() == MVT::bf16)
7568return lowerFixedLengthVectorFCOPYSIGNToRVV(
Op, DAG);
7577return lowerToScalableOp(
Op, DAG);
7580return lowerVectorStrictFSetcc(
Op, DAG);
7591return lowerMaskedGather(
Op, DAG);
7593case ISD::VP_SCATTER:
7594return lowerMaskedScatter(
Op, DAG);
7596return lowerGET_ROUNDING(
Op, DAG);
7598return lowerSET_ROUNDING(
Op, DAG);
7600return lowerEH_DWARF_CFA(
Op, DAG);
7602if (
Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7603return lowerVPMergeMask(
Op, DAG);
7613case ISD::VP_UADDSAT:
7614case ISD::VP_USUBSAT:
7615case ISD::VP_SADDSAT:
7616case ISD::VP_SSUBSAT:
7619return lowerVPOp(
Op, DAG);
7623return lowerLogicVPOp(
Op, DAG);
7632case ISD::VP_FMINNUM:
7633case ISD::VP_FMAXNUM:
7634case ISD::VP_FCOPYSIGN:
7641return lowerVPOp(
Op, DAG);
7642case ISD::VP_IS_FPCLASS:
7643return LowerIS_FPCLASS(
Op, DAG);
7644case ISD::VP_SIGN_EXTEND:
7645case ISD::VP_ZERO_EXTEND:
7646if (
Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7647return lowerVPExtMaskOp(
Op, DAG);
7648return lowerVPOp(
Op, DAG);
7649case ISD::VP_TRUNCATE:
7650return lowerVectorTruncLike(
Op, DAG);
7651case ISD::VP_FP_EXTEND:
7652case ISD::VP_FP_ROUND:
7653return lowerVectorFPExtendOrRoundLike(
Op, DAG);
7654case ISD::VP_SINT_TO_FP:
7655case ISD::VP_UINT_TO_FP:
7656if (
Op.getValueType().isVector() &&
7657 ((
Op.getValueType().getScalarType() == MVT::f16 &&
7660Op.getValueType().getScalarType() == MVT::bf16)) {
7673case ISD::VP_FP_TO_SINT:
7674case ISD::VP_FP_TO_UINT:
7676 Op1.getValueType().isVector() &&
7677 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7680 Op1.getValueType().getScalarType() == MVT::bf16)) {
7686 Op1.getValueType().getVectorElementCount());
7690 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7692return lowerVPFPIntConvOp(
Op, DAG);
7696if (
Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7697return lowerVPSetCCMaskOp(
Op, DAG);
7703case ISD::VP_BITREVERSE:
7705return lowerVPOp(
Op, DAG);
7707case ISD::VP_CTLZ_ZERO_UNDEF:
7708if (Subtarget.hasStdExtZvbb())
7709return lowerVPOp(
Op, DAG);
7710return lowerCTLZ_CTTZ_ZERO_UNDEF(
Op, DAG);
7712case ISD::VP_CTTZ_ZERO_UNDEF:
7713if (Subtarget.hasStdExtZvbb())
7714return lowerVPOp(
Op, DAG);
7715return lowerCTLZ_CTTZ_ZERO_UNDEF(
Op, DAG);
7717return lowerVPOp(
Op, DAG);
7718case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7719return lowerVPStridedLoad(
Op, DAG);
7720case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7721return lowerVPStridedStore(
Op, DAG);
7725case ISD::VP_FNEARBYINT:
7727case ISD::VP_FROUNDEVEN:
7728case ISD::VP_FROUNDTOZERO:
7732case ISD::VP_FMAXIMUM:
7733case ISD::VP_FMINIMUM:
7737case ISD::EXPERIMENTAL_VP_SPLICE:
7738return lowerVPSpliceExperimental(
Op, DAG);
7739case ISD::EXPERIMENTAL_VP_REVERSE:
7740return lowerVPReverseExperimental(
Op, DAG);
7741case ISD::EXPERIMENTAL_VP_SPLAT:
7742return lowerVPSplatExperimental(
Op, DAG);
7745"llvm.clear_cache only needs custom lower on Linux targets");
7748return emitFlushICache(DAG,
Op.getOperand(0),
Op.getOperand(1),
7749Op.getOperand(2), Flags,
DL);
7752return lowerDYNAMIC_STACKALLOC(
Op, DAG);
7754return lowerINIT_TRAMPOLINE(
Op, DAG);
7756return lowerADJUST_TRAMPOLINE(
Op, DAG);
7763 MakeLibCallOptions CallOptions;
7764 std::pair<SDValue, SDValue> CallResult =
7765makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7766 {Start,
End, Flags}, CallOptions,
DL, InChain);
7768// This function returns void so only the out chain matters. 7769return CallResult.second;
7777// Create an MCCodeEmitter to encode instructions. 7782 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7786SDValue Trmp =
Op.getOperand(1);
// trampoline 7789constValue *TrmpAddr = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
7791// We store in the trampoline buffer the following instructions and data. 7797// 16: <StaticChainOffset> 7798// 24: <FunctionAddressOffset> 7801constexprunsigned StaticChainOffset = 16;
7802constexprunsigned FunctionAddressOffset = 24;
7806auto GetEncoding = [&](
constMCInst &MC) {
7809 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7818// Loads the current PC into t2. 7819 GetEncoding(
MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7821// Loads the function address into t0. Note that we are using offsets 7822// pc-relative to the first instruction of the trampoline. 7824MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7825 FunctionAddressOffset)),
7827// Load the value of the static chain. 7829MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7830 StaticChainOffset)),
7832// Jump to the function. 7838// Store encoded instructions. 7848// Now store the variable part of the trampoline. 7849SDValue FunctionAddress =
Op.getOperand(2);
7852// Store the given static chain and function pointer in the trampoline buffer. 7853structOffsetValuePair {
7857 } OffsetValues[] = {
7858 {StaticChainOffset, StaticChain},
7859 {FunctionAddressOffset, FunctionAddress},
7864 DAG.
getConstant(OffsetValue.Offset, dl, MVT::i64));
7865 OffsetValue.Addr =
Addr;
7866 OutChains[
Idx + 4] =
7873// The end of instructions of trampoline is the same as the static chain 7874// address that we computed earlier. 7875SDValue EndOfTrmp = OffsetValues[0].Addr;
7877// Call clear cache on the trampoline instructions. 7889returnOp.getOperand(0);
7906N->getOffset(), Flags);
7935template <
class NodeTy>
7937bool IsLocal,
bool IsExternWeak)
const{
7941// When HWASAN is used and tagging of global variables is enabled 7942// they should be accessed via the GOT, since the tagged address of a global 7943// is incompatible with existing code models. This also applies to non-pic 7947if (IsLocal && !Subtarget.allowTaggedGlobals())
7948// Use PC-relative addressing to access the symbol. This generates the 7949// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 7950// %pcrel_lo(auipc)). 7953// Use PC-relative addressing to access the GOT for this symbol, then load 7954// the address from the GOT. This generates the pattern (PseudoLGA sym), 7955// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 7972// Generate a sequence for accessing addresses within the first 2 GiB of 7973// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 7982// An extern weak symbol may be undefined, i.e. have value 0, which may 7983// not be within 2GiB of PC, so use GOT-indirect addressing to access the 7984// symbol. This generates the pattern (PseudoLGA sym), which expands to 7985// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 7998// Generate a sequence for accessing addresses within any 2GiB range within 7999// the address space. This generates the pattern (PseudoLLA sym), which 8000// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 8007// Using pc-relative mode for other node type. 8017assert(
N->getOffset() == 0 &&
"unexpected offset in global node");
8026return getAddr(
N, DAG);
8033return getAddr(
N, DAG);
8040return getAddr(
N, DAG);
8052// Use PC-relative addressing to access the GOT for this TLS symbol, then 8053// load the address from the GOT and add the thread pointer. This generates 8054// the pattern (PseudoLA_TLS_IE sym), which expands to 8055// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 8067// Add the thread pointer. 8072// Generate a sequence for accessing the address relative to the thread 8073// pointer, with the appropriate adjustment for the thread pointer offset. 8074// This generates the pattern 8075// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 8097// Use a PC-relative addressing mode to access the global dynamic GOT address. 8098// This generates the pattern (PseudoLA_TLS_GD sym), which expands to 8099// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 8104// Prepare argument list to generate call. 8109Args.push_back(Entry);
8111// Setup call to __tls_get_addr. 8128// Use a PC-relative addressing mode to access the global dynamic GOT address. 8129// This generates the pattern (PseudoLA_TLSDESC sym), which expands to 8131// auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol) 8132// lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label) 8133// addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label) 8134// jalr t0, tY // R_RISCV_TLSDESC_CALL(label) 8142assert(
N->getOffset() == 0 &&
"unexpected offset in global node");
8156Addr = getStaticTLSAddr(
N, DAG,
/*UseGOT=*/false);
8159Addr = getStaticTLSAddr(
N, DAG,
/*UseGOT=*/true);
8164 : getDynamicTLSAddr(
N, DAG);
8171// Return true if Val is equal to (setcc LHS, RHS, CC). 8172// Return false if Val is the inverse of (setcc LHS, RHS, CC). 8173// Otherwise, return std::nullopt. 8181if (
LHS == LHS2 &&
RHS == RHS2) {
8186 }
elseif (
LHS == RHS2 &&
RHS == LHS2) {
8202MVT VT =
N->getSimpleValueType(0);
8206// (select c, -1, y) -> -c | y 8211// (select c, y, -1) -> (c-1) | y 8218// (select c, 0, y) -> (c-1) & y 8224// (select c, y, 0) -> -c & y 8231// select c, ~x, x --> xor -c, x 8232if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8235if (~TrueVal == FalseVal) {
8241// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops 8242// when both truev and falsev are also setcc. 8249// (select x, x, y) -> x | y 8250// (select !x, x, y) -> x & y 8255// (select x, y, x) -> x & y 8256// (select !x, y, x) -> x | y 8266// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants 8267// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. 8268// For now we only consider transformation profitable if `binOp(c0, c1)` ends up 8269// being `0` or `-1`. In such cases we can replace `select` with `and`. 8270// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize 8275if (Subtarget.hasShortForwardBranchOpt())
8278unsigned SelOpNo = 0;
8288unsigned ConstSelOpNo = 1;
8289unsigned OtherSelOpNo = 2;
8290if (!dyn_cast<ConstantSDNode>(Sel->
getOperand(ConstSelOpNo))) {
8295ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8296if (!ConstSelOpNode || ConstSelOpNode->
isOpaque())
8300ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8301if (!ConstBinOpNode || ConstBinOpNode->
isOpaque())
8307SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8309std::swap(NewConstOps[0], NewConstOps[1]);
8321SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8323std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8326SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8327SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8336MVT VT =
Op.getSimpleValueType();
8339// Lower vector SELECTs to VSELECTs by splatting the condition. 8346// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ 8347// nodes to implement the SELECT. Performing the lowering here allows for 8348// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless 8349// sequence or RISCVISD::SELECT_CC node (branch-based select). 8350if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8352// (select c, t, 0) -> (czero_eqz t, c) 8355// (select c, 0, f) -> (czero_nez f, c) 8359// (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) 8365// (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) 8372// Try some other optimizations before falling back to generic lowering. 8376// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1) 8377// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2) 8378if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8382 TrueVal, Subtarget.
getXLen(), Subtarget,
/*CompressionCost=*/true);
8384 FalseVal, Subtarget.
getXLen(), Subtarget,
/*CompressionCost=*/true);
8385bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8387 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal,
DL, VT);
8392DL, VT, LHSVal, CondV);
8396// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) 8397// Unless we have the short forward branch optimization. 8408if (
Op.hasOneUse()) {
8409unsigned UseOpc =
Op->user_begin()->getOpcode();
8415// Opcode check is necessary because foldBinOpIntoSelectIfProfitable 8416// may return a constant node and cause crash in lowerSELECT. 8418return lowerSELECT(NewSel, DAG);
8424// (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) 8425// (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) 8438// If the condition is not an integer SETCC which operates on XLenVT, we need 8439// to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: 8440// (select condv, truev, falsev) 8441// -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 8447SDValue Ops[] = {CondV,
Zero, SetNE, TrueV, FalseV};
8452// If the CondV is the output of a SETCC node which operates on XLenVT inputs, 8453// then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take 8454// advantage of the integer compare+branch instructions. i.e.: 8455// (select (setcc lhs, rhs, cc), truev, falsev) 8456// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 8461// Special case for a select of 2 constants that have a diffence of 1. 8462// Normally this is done by DAGCombine, but if the select is introduced by 8463// type legalization or op legalization, we miss it. Restricting to SETLT 8464// case for now because that is what signed saturating add/sub need. 8465// FIXME: We don't need the condition to be SETLT or even a SETCC, 8466// but we would probably want to swap the true/false values if the condition 8467// is SETGE/SETLE to avoid an XORI. 8468if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8472if (TrueVal - 1 == FalseVal)
8474if (TrueVal + 1 == FalseVal)
8479// 1 < x ? x : 1 -> 0 < x ? x : 1 8481 RHS == TrueV && LHS == FalseV) {
8483// 0 <u x is the same as x != 0. 8490// x <s -1 ? x : -1 -> x <s 0 ? x : -1 8498if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8499// (select (setcc lhs, rhs, CC), constant, falsev) 8500// -> (select (setcc lhs, rhs, InverseCC), falsev, constant) 8524 LHS, RHS, TargetCC,
Op.getOperand(2));
8540// vastart just stores the address of the VarArgsFrameIndex slot into the 8541// memory location argument. 8542constValue *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
8554int XLenInBytes = Subtarget.
getXLen() / 8;
8556EVT VT =
Op.getValueType();
8559unsignedDepth =
Op.getConstantOperandVal(0);
8561intOffset = -(XLenInBytes * 2);
8578int XLenInBytes = Subtarget.
getXLen() / 8;
8583EVT VT =
Op.getValueType();
8585unsignedDepth =
Op.getConstantOperandVal(0);
8587intOff = -XLenInBytes;
8588SDValue FrameAddr = lowerFRAMEADDR(
Op, DAG);
8595// Return the value of the return address register, marking it an implicit 8607EVT VT =
Lo.getValueType();
8609// if Shamt-XLEN < 0: // Shamt < XLEN 8611// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 8614// Hi = Lo << (Shamt-XLEN) 8646EVT VT =
Lo.getValueType();
8649// if Shamt-XLEN < 0: // Shamt < XLEN 8650// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 8653// Lo = Hi >>s (Shamt-XLEN); 8654// Hi = Hi >>s (XLEN-1) 8657// if Shamt-XLEN < 0: // Shamt < XLEN 8658// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 8661// Lo = Hi >>u (Shamt-XLEN); 8692// Lower splats of i1 types to SETCC. For each mask vector type, we have a 8693// legal equivalently-sized i8 type, so we can use that as a go-between. 8697MVT VT =
Op.getSimpleValueType();
8699// All-zeros or all-ones splats are handled specially. 8716// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 8717// illegal (currently only vXi64 RV32). 8718// FIXME: We could also catch non-constant sign-extended i32 values and lower 8719// them to VMV_V_X_VL. 8723MVT VecVT =
Op.getSimpleValueType();
8725"Unexpected SPLAT_VECTOR_PARTS lowering");
8731MVT ContainerVT = VecVT;
8746// Custom-lower extensions from mask vectors by using a vselect either with 1 8747// for zero/any-extension or -1 for sign-extension: 8748// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 8749// Note that any-extension is lowered identically to zero-extension. 8751 int64_t ExtTrueVal)
const{
8753MVT VecVT =
Op.getSimpleValueType();
8755// Only custom-lower extensions from mask types 8756assert(Src.getValueType().isVector() &&
8757 Src.getValueType().getVectorElementType() == MVT::i1);
8778 DAG.
getUNDEF(ContainerVT), SplatZero, VL);
8780 DAG.
getUNDEF(ContainerVT), SplatTrueVal, VL);
8783 SplatZero, DAG.
getUNDEF(ContainerVT), VL);
8788SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8790MVT ExtVT =
Op.getSimpleValueType();
8791// Only custom-lower extensions from fixed-length vector types. 8794MVT VT =
Op.getOperand(0).getSimpleValueType();
8795// Grab the canonical container type for the extended type. Infer the smaller 8796// type from that to ensure the same number of vector elements, as we know 8797// the LMUL will be sufficient to hold the smaller type. 8799// Get the extended container type manually to ensure the same number of 8800// vector elements between source and dest. 8815// Custom-lower truncations from vectors to mask vectors by using a mask and a 8817// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 8820bool IsVPTrunc =
Op.getOpcode() == ISD::VP_TRUNCATE;
8822EVT MaskVT =
Op.getValueType();
8823// Only expect to custom-lower truncations to mask types 8825"Unexpected type for vector mask lowering");
8827MVT VecVT = Src.getSimpleValueType();
8831 VL =
Op.getOperand(2);
8833// If this is a fixed vector, we need to convert it to a scalable vector. 8834MVT ContainerVT = VecVT;
8840MVT MaskContainerVT =
8847 std::tie(Mask, VL) =
8855 DAG.
getUNDEF(ContainerVT), SplatOne, VL);
8857 DAG.
getUNDEF(ContainerVT), SplatZero, VL);
8861 DAG.
getUNDEF(ContainerVT), Mask, VL);
8864 DAG.
getUNDEF(MaskContainerVT), Mask, VL});
8872unsigned Opc =
Op.getOpcode();
8873bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8876MVT VT =
Op.getSimpleValueType();
8877// Only custom-lower vector truncates 8878assert(VT.
isVector() &&
"Unexpected type for vector truncate lowering");
8880// Truncates to mask types are handled differently 8882return lowerVectorMaskTruncLike(
Op, DAG);
8884// RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 8885// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 8886// truncate by one power of two at a time. 8890MVT SrcVT = Src.getSimpleValueType();
8895"Unexpected vector truncate lowering");
8897MVT ContainerVT = SrcVT;
8901 VL =
Op.getOperand(2);
8914 std::tie(Mask, VL) =
8930 }
while (SrcEltVT != DstEltVT);
8939RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(
SDValueOp,
8944MVT VT =
Op.getSimpleValueType();
8945MVT SrcVT = Src.getSimpleValueType();
8946MVT ContainerVT = VT;
8956// RVV can only widen/truncate fp to types double/half the size as the source. 8963// For double rounding, the intermediate rounding should be round-to-odd. 8969 Chain, Src, Mask, VL);
8970 Chain = Src.getValue(1);
8977 Chain, Src, Mask, VL);
8979// StrictFP operations have two result values. Their lowered result should 8980// have same result count. 8988RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(
SDValueOp,
8991Op.getOpcode() == ISD::VP_FP_ROUND ||
Op.getOpcode() == ISD::VP_FP_EXTEND;
8994// RVV can only do truncate fp to types half the size as the source. We 8995// custom-lower f64->f16 rounds via RVV's round-to-odd float 8996// conversion instruction. 8998MVT VT =
Op.getSimpleValueType();
9000assert(VT.
isVector() &&
"Unexpected type for vector truncate lowering");
9003MVT SrcVT = Src.getSimpleValueType();
9005bool IsDirectExtend =
9013bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9015// Prepare any fixed-length vector operands. 9016MVT ContainerVT = VT;
9020 VL =
Op.getOperand(2);
9034 std::tie(Mask, VL) =
9040 Src = DAG.
getNode(ConvOpc,
DL, ContainerVT, Src, Mask, VL);
9046unsigned InterConvOpc =
9051 DAG.
getNode(InterConvOpc,
DL, InterVT, Src, Mask, VL);
9053 DAG.
getNode(ConvOpc,
DL, ContainerVT, IntermediateConv, Mask, VL);
9059// Given a scalable vector type and an index into it, returns the type for the 9060// smallest subvector that the index fits in. This can be used to reduce LMUL 9061// for operations like vslidedown. 9063// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. 9064static std::optional<MVT>
9070constunsigned MinVLMAX = VectorBitsMin / EltSize;
9072if (MaxIdx < MinVLMAX)
9074elseif (MaxIdx < MinVLMAX * 2)
9076elseif (MaxIdx < MinVLMAX * 4)
9085// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 9086// first position of a vector, and that vector is slid up to the insert index. 9087// By limiting the active vector length to index+1 and merging with the 9088// original vector (with an undisturbed tail policy for elements >= VL), we 9089// achieve the desired result of leaving all elements untouched except the one 9090// at VL-1, which is replaced with the desired value. 9094MVT VecVT =
Op.getSimpleValueType();
9102// FIXME: For now we just promote to an i8 vector and insert into that, 9103// but this is probably not optimal. 9111 ValVT == MVT::bf16) {
9112// If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first. 9120MVT ContainerVT = VecVT;
9121// If the operand is a fixed-length vector, convert to a scalable one. 9127// If we know the index we're going to insert at, we can shrink Vec so that 9128// we're performing the scalar inserts and slideup on a smaller LMUL. 9129MVT OrigContainerVT = ContainerVT;
9132if (
auto *IdxC = dyn_cast<ConstantSDNode>(
Idx)) {
9133constunsigned OrigIdx = IdxC->getZExtValue();
9134// Do we know an upper bound on LMUL? 9136DL, DAG, Subtarget)) {
9137 ContainerVT = *ShrunkVT;
9141// If we're compiling for an exact VLEN value, we can always perform 9142// the insert in m1 as we can determine the register corresponding to 9143// the index in the register group. 9146 VLEN && ContainerVT.
bitsGT(M1VT)) {
9149unsigned RemIdx = OrigIdx % ElemsPerVReg;
9150unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9151unsigned ExtractIdx =
9164// Even i64-element vectors on RV32 can be lowered without scalar 9165// legalization if the most-significant 32 bits of the value are not affected 9166// by the sign-extension of the lower 32 bits. 9167// TODO: We could also catch sign extensions of a 32-bit value. 9168if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9169constauto *CVal = cast<ConstantSDNode>(Val);
9170if (isInt<32>(CVal->getSExtValue())) {
9171 IsLegalInsert =
true;
9186 Vec = DAG.
getNode(Opc,
DL, ContainerVT, Vec, Val, VL);
9197// On RV32, i64-element vectors must be specially handled to place the 9198// value at element 0, by using two vslide1down instructions in sequence on 9199// the i32 split lo/hi value. Use an equivalently-sized i32 vector for 9202 std::tie(ValLo, ValHi) = DAG.
SplitScalar(Val,
DL, MVT::i32, MVT::i32);
9207// Limit the active VL to two. 9209// If the Idx is 0 we can insert directly into the vector. 9211// First slide in the lo value, then the hi in above it. We use slide1down 9212// to avoid the register group overlap constraint of vslide1up. 9214 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9215// If the source vector is undef don't pass along the tail elements from 9216// the previous slide1down. 9219Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9220// Bitcast back to the right container type. 9221 ValInVec = DAG.
getBitcast(ContainerVT, ValInVec);
9226 ValInVec, AlignedIdx);
9232// First slide in the lo value, then the hi in above it. We use slide1down 9233// to avoid the register group overlap constraint of vslide1up. 9236 DAG.
getUNDEF(I32ContainerVT), ValLo,
9237 I32Mask, InsertI64VL);
9239 DAG.
getUNDEF(I32ContainerVT), ValInVec, ValHi,
9240 I32Mask, InsertI64VL);
9241// Bitcast back to the right container type. 9242 ValInVec = DAG.
getBitcast(ContainerVT, ValInVec);
9245// Now that the value is in a vector, slide it into position. 9249// Use tail agnostic policy if Idx is the last index of Vec. 9255Idx, Mask, InsertVL, Policy);
9259 Slideup, AlignedIdx);
9265// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 9266// extract the first element: (extractelt (slidedown vec, idx), 0). For integer 9267// types this is done using VMV_X_S to allow us to glean information about the 9268// sign bits of the result. 9274EVT EltVT =
Op.getValueType();
9279// Use vfirst.m to extract the first bit. 9281MVT ContainerVT = VecVT;
9297unsigned WidenVecLen;
9300unsigned MaxEEW = Subtarget.
getELen();
9305"the number of elements should be power of 2");
9311 WideEltVT = LargestEltVT;
9313// extract element index = index / element width 9314 ExtractElementIdx = DAG.
getNode(
9317// mask bit index = index % element width 9325 Vec, ExtractElementIdx);
9326// Extract the bit from GPR. 9334// Otherwise, promote to an i8 vector and extract from that. 9341 EltVT == MVT::bf16) {
9342// If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x 9350// If this is a fixed vector, we need to convert it to a scalable vector. 9351MVT ContainerVT = VecVT;
9357// If we're compiling for an exact VLEN value and we have a known 9358// constant index, we can always perform the extract in m1 (or 9359// smaller) as we can determine the register corresponding to 9360// the index in the register group. 9362if (
auto *IdxC = dyn_cast<ConstantSDNode>(
Idx);
9363 IdxC && VLen && VecVT.
getSizeInBits().getKnownMinValue() > *VLen) {
9365unsigned OrigIdx = IdxC->getZExtValue();
9368unsigned RemIdx = OrigIdx % ElemsPerVReg;
9369unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9370unsigned ExtractIdx =
9378// Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which 9379// contains our index. 9380 std::optional<uint64_t> MaxIdx;
9383if (
auto *IdxC = dyn_cast<ConstantSDNode>(
Idx))
9384 MaxIdx = IdxC->getZExtValue();
9388 ContainerVT = *SmallerVT;
9394// If after narrowing, the required slide is still greater than LMUL2, 9395// fallback to generic expansion and go through the stack. This is done 9396// for a subtle reason: extracting *all* elements out of a vector is 9397// widely expected to be linear in vector size, but because vslidedown 9398// is linear in LMUL, performing N extracts using vslidedown becomes 9399// O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack 9400// seems to have the same problem (the store is linear in LMUL), but the 9401// generic expansion *memoizes* the store, and thus for many extracts of 9402// the same vector we end up with one store and a bunch of loads. 9403// TODO: We don't have the same code for insert_vector_elt because we 9404// have BUILD_VECTOR and handle the degenerate case there. Should we 9405// consider adding an inverse BUILD_VECTOR node? 9410// If the index is 0, the vector is already in the right position. 9412// Use a VL of 1 to avoid processing more elements than we need. 9419// Floating-point extracts are handled in TableGen. 9428// Some RVV intrinsics may claim that they want an integer operand to be 9429// promoted or expanded. 9435"Unexpected opcode");
9442unsigned IntNo =
Op.getConstantOperandVal(HasChain ? 1 : 0);
9447 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9448if (!
II || !
II->hasScalarOperand())
9451unsigned SplatOp =
II->ScalarOperand + 1 + HasChain;
9459// If this isn't a scalar, or its type is XLenVT we're done. 9463// Simplest case is that the operand needs to be promoted to XLenVT. 9464if (OpVT.
bitsLT(XLenVT)) {
9465// If the operand is a constant, sign extend to increase our chances 9466// of being able to use a .vi instruction. ANY_EXTEND would become a 9467// a zero extend and the simm5 check in isel would fail. 9468// FIXME: Should we ignore the upper bits in isel instead? 9471 ScalarOp = DAG.
getNode(ExtOpc,
DL, XLenVT, ScalarOp);
9475// Use the previous operand to get the vXi64 VT. The result might be a mask 9476// VT for compares. Using the previous operand assumes that the previous 9477// operand will never have a smaller element size than a scalar operand and 9478// that a widening operation never uses SEW=64. 9479// NOTE: If this fails the below assert, we can probably just find the 9480// element count from any operand or result and use it to construct the VT. 9481assert(
II->ScalarOperand > 0 &&
"Unexpected splat operand!");
9482MVT VT =
Op.getOperand(SplatOp - 1).getSimpleValueType();
9484// The more complex case is when the scalar is larger than XLenVT. 9485assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9488// If this is a sign-extended 32-bit value, we can truncate it and rely on the 9489// instruction to sign-extend since SEW>XLEN. 9496case Intrinsic::riscv_vslide1up:
9497case Intrinsic::riscv_vslide1down:
9498case Intrinsic::riscv_vslide1up_mask:
9499case Intrinsic::riscv_vslide1down_mask: {
9500// We need to special case these when the scalar is larger than XLen. 9502bool IsMasked = NumOps == 7;
9504// Convert the vector source to the equivalent nxvXi32 vector. 9508 std::tie(ScalarLo, ScalarHi) =
9511// Double the VL since we halved SEW. 9515// Optimize for constant AVL 9516if (isa<ConstantSDNode>(AVL)) {
9517constauto [MinVLMAX, MaxVLMAX] =
9521if (AVLInt <= MinVLMAX) {
9523 }
elseif (AVLInt >= 2 * MaxVLMAX) {
9524// Just set vl to VLMAX in this situation 9527// For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl 9528// is related to the hardware implementation. 9529// So let the following code handle 9539// Using vsetvli instruction to get actually used length which related to 9540// the hardware implementation 9549// Shift the two scalar parts in using SEW=32 slide1up/slide1down 9557if (IntNo == Intrinsic::riscv_vslide1up ||
9558 IntNo == Intrinsic::riscv_vslide1up_mask) {
9560 ScalarHi, I32Mask, I32VL);
9562 ScalarLo, I32Mask, I32VL);
9565 ScalarLo, I32Mask, I32VL);
9567 ScalarHi, I32Mask, I32VL);
9570// Convert back to nxvXi64. 9575// Apply mask after the operation. 9578// Assume Policy operand is the last operand. 9580// We don't need to select maskedoff if it's undef. 9587// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. 9588// It's fine because vmerge does not care mask policy. 9594// We need to convert the scalar to a splat vector. 9601// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support 9602// scalable vector llvm.get.vector.length for now. 9604// We need to convert from a scalable VF to a vsetvli with VLMax equal to 9605// (vscale * VF). The vscale and VF are independent of element width. We use 9606// SEW=8 for the vsetvli because it is the only element width that supports all 9607// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is 9608// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The 9609// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different 9610// SEW and LMUL are better for the surrounding vector instructions. 9615// The smallest LMUL is only valid for the smallest element width. 9616constunsigned ElementWidth = 8;
9618// Determine the VF that corresponds to LMUL 1 for ElementWidth. 9620// We don't support VF==1 with ELEN==32. 9621 [[maybe_unused]]
unsigned MinVF =
9624 [[maybe_unused]]
unsigned VF =
N->getConstantOperandVal(2);
9628bool Fractional = VF < LMul1VF;
9629unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9650MVT ContainerVT = OpVT;
9677unsigned IntNo =
Op.getConstantOperandVal(HasChain ? 1 : 0);
9681 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9682if (!
II || !
II->hasScalarOperand())
9685unsigned SplatOp =
II->ScalarOperand + 1;
9692// The code below is partially copied from lowerVectorIntrinsicScalars. 9693// If this isn't a scalar, or its type is XLenVT we're done. 9697// Manually emit promote operation for scalar operation. 9698if (OpVT.
bitsLT(XLenVT)) {
9701 ScalarOp = DAG.
getNode(ExtOpc,
DL, XLenVT, ScalarOp);
9712EVT ValType = V.getValueType();
9713if (ValType.isVector() && ValType.isFloatingPoint()) {
9716 ValType.getVectorElementCount());
9719if (ValType.isFixedLengthVector()) {
9721 DAG, V.getSimpleValueType(), Subtarget);
9727// LMUL * VLEN should be greater than or equal to EGS * SEW 9737unsigned IntNo =
Op.getConstantOperandVal(0);
9743break;
// Don't custom lower most intrinsics. 9744case Intrinsic::riscv_tuple_insert: {
9752case Intrinsic::riscv_tuple_extract: {
9759case Intrinsic::thread_pointer: {
9763case Intrinsic::riscv_orc_b:
9764case Intrinsic::riscv_brev8:
9765case Intrinsic::riscv_sha256sig0:
9766case Intrinsic::riscv_sha256sig1:
9767case Intrinsic::riscv_sha256sum0:
9768case Intrinsic::riscv_sha256sum1:
9769case Intrinsic::riscv_sm3p0:
9770case Intrinsic::riscv_sm3p1: {
9783return DAG.
getNode(Opc,
DL, XLenVT,
Op.getOperand(1));
9785case Intrinsic::riscv_sm4ks:
9786case Intrinsic::riscv_sm4ed: {
9790return DAG.
getNode(Opc,
DL, XLenVT,
Op.getOperand(1),
Op.getOperand(2),
9793case Intrinsic::riscv_zip:
9794case Intrinsic::riscv_unzip: {
9797return DAG.
getNode(Opc,
DL, XLenVT,
Op.getOperand(1));
9799case Intrinsic::riscv_mopr:
9803case Intrinsic::riscv_moprr: {
9805Op.getOperand(2),
Op.getOperand(3));
9807case Intrinsic::riscv_clmul:
9810case Intrinsic::riscv_clmulh:
9811case Intrinsic::riscv_clmulr: {
9814return DAG.
getNode(Opc,
DL, XLenVT,
Op.getOperand(1),
Op.getOperand(2));
9816case Intrinsic::experimental_get_vector_length:
9818case Intrinsic::experimental_cttz_elts:
9820case Intrinsic::riscv_vmv_x_s: {
9824case Intrinsic::riscv_vfmv_f_s:
9827case Intrinsic::riscv_vmv_v_x:
9829Op.getOperand(3),
Op.getSimpleValueType(),
DL, DAG,
9831case Intrinsic::riscv_vfmv_v_f:
9833Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
9834case Intrinsic::riscv_vmv_s_x: {
9837if (
Scalar.getValueType().bitsLE(XLenVT)) {
9840Op.getOperand(1), Scalar,
Op.getOperand(3));
9843assert(
Scalar.getValueType() == MVT::i64 &&
"Unexpected scalar VT!");
9845// This is an i64 value that lives in two scalar registers. We have to 9846// insert this in a convoluted way. First we build vXi64 splat containing 9847// the two values that we assemble using some bit math. Next we'll use 9848// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 9849// to merge element 0 from our splat into the source vector. 9850// FIXME: This is probably not the best way to do this, but it is 9851// consistent with INSERT_VECTOR_ELT lowering so it is a good starting 9858// vmseq.vx mMask, vVid, 0 9859// vmerge.vvm vDest, vSrc, vVal, mMask 9860MVT VT =
Op.getSimpleValueType();
9865if (
Op.getOperand(1).isUndef())
9881case Intrinsic::riscv_vfmv_s_f:
9883Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
9884// EGS * EEW >= 128 bits 9885case Intrinsic::riscv_vaesdf_vv:
9886case Intrinsic::riscv_vaesdf_vs:
9887case Intrinsic::riscv_vaesdm_vv:
9888case Intrinsic::riscv_vaesdm_vs:
9889case Intrinsic::riscv_vaesef_vv:
9890case Intrinsic::riscv_vaesef_vs:
9891case Intrinsic::riscv_vaesem_vv:
9892case Intrinsic::riscv_vaesem_vs:
9893case Intrinsic::riscv_vaeskf1:
9894case Intrinsic::riscv_vaeskf2:
9895case Intrinsic::riscv_vaesz_vs:
9896case Intrinsic::riscv_vsm4k:
9897case Intrinsic::riscv_vsm4r_vv:
9898case Intrinsic::riscv_vsm4r_vs: {
9899if (!
isValidEGW(4,
Op.getSimpleValueType(), Subtarget) ||
9900 !
isValidEGW(4,
Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9901 !
isValidEGW(4,
Op->getOperand(2).getSimpleValueType(), Subtarget))
9905// EGS * EEW >= 256 bits 9906case Intrinsic::riscv_vsm3c:
9907case Intrinsic::riscv_vsm3me: {
9908if (!
isValidEGW(8,
Op.getSimpleValueType(), Subtarget) ||
9909 !
isValidEGW(8,
Op->getOperand(1).getSimpleValueType(), Subtarget))
9913// zvknha(SEW=32)/zvknhb(SEW=[32|64]) 9914case Intrinsic::riscv_vsha2ch:
9915case Intrinsic::riscv_vsha2cl:
9916case Intrinsic::riscv_vsha2ms: {
9917if (
Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9918 !Subtarget.hasStdExtZvknhb())
9920if (!
isValidEGW(4,
Op.getSimpleValueType(), Subtarget) ||
9921 !
isValidEGW(4,
Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9922 !
isValidEGW(4,
Op->getOperand(2).getSimpleValueType(), Subtarget))
9926case Intrinsic::riscv_sf_vc_v_x:
9927case Intrinsic::riscv_sf_vc_v_i:
9928case Intrinsic::riscv_sf_vc_v_xv:
9929case Intrinsic::riscv_sf_vc_v_iv:
9930case Intrinsic::riscv_sf_vc_v_vv:
9931case Intrinsic::riscv_sf_vc_v_fv:
9932case Intrinsic::riscv_sf_vc_v_xvv:
9933case Intrinsic::riscv_sf_vc_v_ivv:
9934case Intrinsic::riscv_sf_vc_v_vvv:
9935case Intrinsic::riscv_sf_vc_v_fvv:
9936case Intrinsic::riscv_sf_vc_v_xvw:
9937case Intrinsic::riscv_sf_vc_v_ivw:
9938case Intrinsic::riscv_sf_vc_v_vvw:
9939case Intrinsic::riscv_sf_vc_v_fvw: {
9940MVT VT =
Op.getSimpleValueType();
9977MVT VT =
Op.getSimpleValueType();
9981if (VT.isFloatingPoint()) {
9986if (VT.isFixedLengthVector())
9996if (VT.isFixedLengthVector())
9998if (VT.isFloatingPoint())
10017unsigned IntNo =
Op.getConstantOperandVal(1);
10021case Intrinsic::riscv_seg2_load:
10022case Intrinsic::riscv_seg3_load:
10023case Intrinsic::riscv_seg4_load:
10024case Intrinsic::riscv_seg5_load:
10025case Intrinsic::riscv_seg6_load:
10026case Intrinsic::riscv_seg7_load:
10027case Intrinsic::riscv_seg8_load: {
10030 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10031 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10032 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10033 Intrinsic::riscv_vlseg8};
10034unsigned NF =
Op->getNumValues() - 1;
10035assert(NF >= 2 && NF <= 8 &&
"Unexpected seg number");
10037MVT VT =
Op->getSimpleValueType(0);
10045auto *
Load = cast<MemIntrinsicSDNode>(
Op);
10057Load->getMemoryVT(),
Load->getMemOperand());
10059for (
unsignedint RetIdx = 0; RetIdx < NF; RetIdx++) {
10068case Intrinsic::riscv_sf_vc_v_x_se:
10070case Intrinsic::riscv_sf_vc_v_i_se:
10072case Intrinsic::riscv_sf_vc_v_xv_se:
10074case Intrinsic::riscv_sf_vc_v_iv_se:
10076case Intrinsic::riscv_sf_vc_v_vv_se:
10078case Intrinsic::riscv_sf_vc_v_fv_se:
10080case Intrinsic::riscv_sf_vc_v_xvv_se:
10082case Intrinsic::riscv_sf_vc_v_ivv_se:
10084case Intrinsic::riscv_sf_vc_v_vvv_se:
10086case Intrinsic::riscv_sf_vc_v_fvv_se:
10088case Intrinsic::riscv_sf_vc_v_xvw_se:
10090case Intrinsic::riscv_sf_vc_v_ivw_se:
10092case Intrinsic::riscv_sf_vc_v_vvw_se:
10094case Intrinsic::riscv_sf_vc_v_fvw_se:
10103unsigned IntNo =
Op.getConstantOperandVal(1);
10107case Intrinsic::riscv_seg2_store:
10108case Intrinsic::riscv_seg3_store:
10109case Intrinsic::riscv_seg4_store:
10110case Intrinsic::riscv_seg5_store:
10111case Intrinsic::riscv_seg6_store:
10112case Intrinsic::riscv_seg7_store:
10113case Intrinsic::riscv_seg8_store: {
10116 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10117 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10118 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10119 Intrinsic::riscv_vsseg8};
10120// Operands are (chain, int_id, vec*, ptr, vl) 10122assert(NF >= 2 && NF <= 8 &&
"Unexpected seg number");
10124MVT VT =
Op->getOperand(2).getSimpleValueType();
10134auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(
Op);
10137for (
unsigned i = 0; i < NF; i++)
10141 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10145 FixedIntrinsic->getChain(),
10154 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10156case Intrinsic::riscv_sf_vc_xv_se:
10158case Intrinsic::riscv_sf_vc_iv_se:
10160case Intrinsic::riscv_sf_vc_vv_se:
10162case Intrinsic::riscv_sf_vc_fv_se:
10164case Intrinsic::riscv_sf_vc_xvv_se:
10166case Intrinsic::riscv_sf_vc_ivv_se:
10168case Intrinsic::riscv_sf_vc_vvv_se:
10170case Intrinsic::riscv_sf_vc_fvv_se:
10172case Intrinsic::riscv_sf_vc_xvw_se:
10174case Intrinsic::riscv_sf_vc_ivw_se:
10176case Intrinsic::riscv_sf_vc_vvw_se:
10178case Intrinsic::riscv_sf_vc_fvw_se:
10186switch (ISDOpcode) {
10189case ISD::VP_REDUCE_ADD:
10192case ISD::VP_REDUCE_UMAX:
10195case ISD::VP_REDUCE_SMAX:
10198case ISD::VP_REDUCE_UMIN:
10201case ISD::VP_REDUCE_SMIN:
10204case ISD::VP_REDUCE_AND:
10207case ISD::VP_REDUCE_OR:
10210case ISD::VP_REDUCE_XOR:
10213case ISD::VP_REDUCE_FADD:
10215case ISD::VP_REDUCE_SEQ_FADD:
10217case ISD::VP_REDUCE_FMAX:
10218case ISD::VP_REDUCE_FMAXIMUM:
10220case ISD::VP_REDUCE_FMIN:
10221case ISD::VP_REDUCE_FMINIMUM:
10231SDValue Vec =
Op.getOperand(IsVP ? 1 : 0);
10236Op.getOpcode() == ISD::VP_REDUCE_AND ||
10237Op.getOpcode() == ISD::VP_REDUCE_OR ||
10238Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10239"Unexpected reduction lowering");
10243MVT ContainerVT = VecVT;
10252 VL =
Op.getOperand(3);
10254 std::tie(Mask, VL) =
10259switch (
Op.getOpcode()) {
10263case ISD::VP_REDUCE_AND: {
10275case ISD::VP_REDUCE_OR:
10281case ISD::VP_REDUCE_XOR: {
10282// ((vcpop x) & 1) != 0 10298// Now include the start value in the operation. 10299// Note that we must return the start value when no elements are operated 10300// upon. The vcpop instructions we've emitted in each case above will return 10301// 0 for an inactive vector, and so we've already received the neutral value: 10302// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we 10303// can simply include the start value. 10305return DAG.
getNode(BaseOpc,
DL,
Op.getValueType(), SetCC,
Op.getOperand(0));
10309auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10310auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10311return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10312 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10315/// Helper to lower a reduction sequence of the form: 10316/// scalar = reduce_op vec, scalar_start 10326// The reduction needs an LMUL1 input; do the splat at either LMUL1 10327// or the original VT if fractional. 10328auto InnerVT = VecVT.
bitsLE(M1VT) ? VecVT : M1VT;
10329// We reuse the VL of the reduction to reduce vsetvli toggles if we can 10330// prove it is non-zero. For the AVL=0 case, we need the scalar to 10331// be the result of the reduction operation. 10332auto InnerVL = NonZeroAVL ? VL : DAG.
getConstant(1,
DL, XLenVT);
10335if (M1VT != InnerVT)
10341SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10355// Due to ordering in legalize types we may have a vector type that needs to 10356// be split. Do that manually so we can get down to a legal type. 10360 VecEVT =
Lo.getValueType();
10364// TODO: The type may need to be widened rather than split. Or widened before 10373MVT ContainerVT = VecVT;
10393 Mask, VL,
DL, DAG, Subtarget);
10396// Given a reduction op, this function returns the matching reduction opcode, 10397// the vector SDValue and the scalar SDValue required to lower this to a 10399static std::tuple<unsigned, SDValue, SDValue>
10403auto Flags =
Op->getFlags();
10404unsigned Opcode =
Op.getOpcode();
10409// Use positive zero if we can. It is cheaper to materialize. 10428return std::make_tuple(RVVOpc,
Op.getOperand(0), Front);
10436MVT VecEltVT =
Op.getSimpleValueType();
10440 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10444MVT ContainerVT = VecVT;
10450MVT ResVT =
Op.getSimpleValueType();
10453 VL,
DL, DAG, Subtarget);
10458if (
Op->getFlags().hasNoNaNs())
10461// Force output to NaN if any element is Nan. 10464 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10465 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10471DL, ResVT, NoNaNs, Res,
10478unsigned Opc =
Op.getOpcode();
10484// TODO: The type may need to be widened rather than split. Or widened before 10501 Vec, Mask, VL,
DL, DAG, Subtarget);
10502if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10503Op->getFlags().hasNoNaNs())
10508// Check if any of the elements in Vec is NaN. 10513// Check if the start value is NaN. 10520DL, ResVT, NoNaNs, Res,
10533unsigned OrigIdx =
Op.getConstantOperandVal(2);
10536if (OrigIdx == 0 && Vec.
isUndef())
10539// We don't have the ability to slide mask vectors up indexed by their i1 10540// elements; the smallest we can do is i8. Often we are able to bitcast to 10541// equivalent i8 vectors. Note that when inserting a fixed-length vector 10542// into a scalable one, we might not necessarily have enough scalable 10543// elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 10547assert(OrigIdx % 8 == 0 &&
"Invalid index");
10550"Unexpected mask vector lowering");
10560// We can't slide this mask vector up indexed by its i1 elements. 10561// This poses a problem when we wish to insert a scalable vector which 10562// can't be re-expressed as a larger type. Just choose the slow path and 10563// extend to a larger type, then truncate back down. 10575// If the subvector vector is a fixed-length type and we don't know VLEN 10576// exactly, we cannot use subregister manipulation to simplify the codegen; we 10577// don't know which register of a LMUL group contains the specific subvector 10578// as we only know the minimum register size. Therefore we must slide the 10579// vector group up the full amount. 10582MVT ContainerVT = VecVT;
10589 DAG.
getUNDEF(ContainerVT), SubVec,
10594// Set the vector length to only the number of elements we care about. Note 10595// that for slideup this includes the offset. 10599// Use tail agnostic policy if we're inserting over Vec's tail. 10604// If we're inserting into the lowest elements, use a tail undisturbed 10611 SubVec =
getVSlideup(DAG, Subtarget,
DL, ContainerVT, Vec, SubVec,
10612 SlideupAmt, Mask, VL, Policy);
10620MVT ContainerVecVT = VecVT;
10626MVT ContainerSubVecVT = SubVecVT;
10634// insert_subvector scales the index by vscale if the subvector is scalable, 10635// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if 10636// we have a fixed length subvector, we need to adjust the index by 1/vscale. 10642 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale,
TRI);
10643 SubRegIdx = Decompose.first;
10645 (OrigIdx % Vscale));
10649 ContainerVecVT, ContainerSubVecVT, OrigIdx,
TRI);
10650 SubRegIdx = Decompose.first;
10657bool ExactlyVecRegSized =
10659 .isKnownMultipleOf(Subtarget.
expandVScale(VecRegSize));
10661// 1. If the Idx has been completely eliminated and this subvector's size is 10662// a vector register or a multiple thereof, or the surrounding elements are 10663// undef, then this is a subvector insert which naturally aligns to a vector 10664// register. These can easily be handled using subregister manipulation. 10665// 2. If the subvector isn't an exact multiple of a valid register group size, 10666// then the insertion must preserve the undisturbed elements of the register. 10667// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 10668// vector type (which resolves to a subregister copy), performing a VSLIDEUP 10669// to place the subvector within the vector register, and an INSERT_SUBVECTOR 10670// of that LMUL=1 type back into the larger vector (resolving to another 10671// subregister operation). See below for how our VSLIDEUP works. We go via a 10672// LMUL=1 type to avoid allocating a large register group to hold our 10674if (RemIdx.
isZero() && (ExactlyVecRegSized || Vec.
isUndef())) {
10676// We may get NoSubRegister if inserting at index 0 and the subvec 10677// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0 10678if (SubRegIdx == RISCV::NoSubRegister) {
10683// Use a insert_subvector that will resolve to an insert subreg. 10696// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 10697// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 10698// (in our case undisturbed). This means we can set up a subvector insertion 10699// where OFFSET is the insertion offset, and the VL is the OFFSET plus the 10700// size of the subvector. 10701MVT InterSubVT = ContainerVecVT;
10710// Extract a subvector equal to the nearest full vector register type. This 10711// should resolve to a EXTRACT_SUBREG instruction. 10725// Use tail agnostic policy if we're inserting over InterSubVT's tail. 10731// If we're inserting into the lowest elements, use a tail undisturbed 10739// Construct the vector length corresponding to RemIdx + length(SubVecVT). 10742 SubVec =
getVSlideup(DAG, Subtarget,
DL, InterSubVT, AlignedExtract, SubVec,
10743 SlideupAmt, Mask, VL, Policy);
10746// If required, insert this subvector back into the correct vector register. 10747// This should resolve to an INSERT_SUBREG instruction. 10748if (ContainerVecVT.
bitsGT(InterSubVT))
10755// We might have bitcast from a mask type: cast back to the original type if 10757return DAG.
getBitcast(
Op.getSimpleValueType(), SubVec);
10763MVT SubVecVT =
Op.getSimpleValueType();
10768unsigned OrigIdx =
Op.getConstantOperandVal(1);
10771// With an index of 0 this is a cast-like subvector, which can be performed 10772// with subregister operations. 10776// We don't have the ability to slide mask vectors down indexed by their i1 10777// elements; the smallest we can do is i8. Often we are able to bitcast to 10778// equivalent i8 vectors. Note that when extracting a fixed-length vector 10779// from a scalable one, we might not necessarily have enough scalable 10780// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 10784assert(OrigIdx % 8 == 0 &&
"Invalid index");
10787"Unexpected mask vector lowering");
10796// We can't slide this mask vector down, indexed by its i1 elements. 10797// This poses a problem when we wish to extract a scalable vector which 10798// can't be re-expressed as a larger type. Just choose the slow path and 10799// extend to a larger type, then truncate back down. 10800// TODO: We could probably improve this when extracting certain fixed 10801// from fixed, where we can extract as i8 and shift the correct element 10802// right to reach the desired subvector? 10815// If the subvector vector is a fixed-length type and we don't know VLEN 10816// exactly, we cannot use subregister manipulation to simplify the codegen; we 10817// don't know which register of a LMUL group contains the specific subvector 10818// as we only know the minimum register size. Therefore we must slide the 10819// vector group down the full amount. 10821MVT ContainerVT = VecVT;
10827// Shrink down Vec so we're performing the slidedown on a smaller LMUL. 10831 ContainerVT = *ShrunkVT;
10838// Set the vector length to only the number of elements we care about. This 10839// avoids sliding down elements we're going to discard straight away. 10844 DAG.
getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10845// Now we can use a cast-like subvector extract to get the result. 10856MVT ContainerSubVecVT = SubVecVT;
10862// extract_subvector scales the index by vscale if the subvector is scalable, 10863// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if 10864// we have a fixed length subvector, we need to adjust the index by 1/vscale. 10870 VecVT, ContainerSubVecVT, OrigIdx / Vscale,
TRI);
10871 SubRegIdx = Decompose.first;
10873 (OrigIdx % Vscale));
10877 VecVT, ContainerSubVecVT, OrigIdx,
TRI);
10878 SubRegIdx = Decompose.first;
10882// If the Idx has been completely eliminated then this is a subvector extract 10883// which naturally aligns to a vector register. These can easily be handled 10884// using subregister manipulation. We use an extract_subvector that will 10885// resolve to an extract subreg. 10897// Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT 10898// was > M1 then the index would need to be a multiple of VLMAX, and so would 10903// If the vector type is an LMUL-group type, extract a subvector equal to the 10904// nearest full vector register type. 10905MVT InterSubVT = VecVT;
10907// If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and 10908// we should have successfully decomposed the extract into a subregister. 10909// We use an extract_subvector that will resolve to a subreg extract. 10910assert(SubRegIdx != RISCV::NoSubRegister);
10922// Slide this vector register down by the desired number of elements in order 10923// to place the desired subvector starting at element 0. 10930 Vec, SlidedownAmt, Mask, VL);
10932// Now the vector is in the right position, extract our final subvector. This 10933// should resolve to a COPY. 10937// We might have bitcast from a mask type: cast back to the original type if 10939return DAG.
getBitcast(
Op.getSimpleValueType(), Slidedown);
10942// Widen a vector's operands to i8, then truncate its results back to the 10943// original type, typically i1. All operand and result types must be the same. 10946MVT VT =
N.getSimpleValueType();
10950assert(
Op.getSimpleValueType() == VT &&
10951"Operands and result must be same type");
10955unsigned NumVals =
N->getNumValues();
10958 NumVals,
N.getValueType().changeVectorElementType(MVT::i8)));
10961for (
unsignedI = 0;
I < NumVals;
I++) {
10967if (TruncVals.
size() > 1)
10969return TruncVals.
front();
10975MVT VecVT =
Op.getSimpleValueType();
10978"vector_interleave on non-scalable vector!");
10980// 1 bit element vectors need to be widened to e8 10984// If the VT is LMUL=8, we need to split and reassemble. 10989EVT SplitVT = Op0Lo.getValueType();
10992 DAG.
getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10994 DAG.
getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
11003// Concatenate the two vectors as one vector to deinterleave 11008Op.getOperand(0),
Op.getOperand(1));
11010// We can deinterleave through vnsrl.wi if the element type is smaller than 11018// For the indices, use the vmv.v.x of an i8 constant to fill the largest 11019// possibly mask vector, then extract the required subvector. Doing this 11020// (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask 11021// creation to be rematerialized during register allocation to reduce 11022// register pressure if needed. 11027 EvenSplat = DAG.
getBitcast(MVT::nxv64i1, EvenSplat);
11032 OddSplat = DAG.
getBitcast(MVT::nxv64i1, OddSplat);
11036// vcompress the even and odd elements into two separate vectors 11038 EvenMask, DAG.
getUNDEF(ConcatVT));
11042// Extract the result half of the gather for even and odd 11054MVT VecVT =
Op.getSimpleValueType();
11057"vector_interleave on non-scalable vector!");
11059// i1 vectors need to be widened to i8 11066// If the VT is LMUL=8, we need to split and reassemble. 11070EVT SplitVT = Op0Lo.getValueType();
11073 DAG.
getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11075 DAG.
getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11086// If the element type is smaller than ELEN, then we can interleave with 11087// vwaddu.vv and vwmaccu.vx 11092// Otherwise, fallback to using vrgathere16.vv 11097Op.getOperand(0),
Op.getOperand(1));
11101// 0 1 2 3 4 5 6 7 ... 11104// 1 1 1 1 1 1 1 1 ... 11107// 1 0 1 0 1 0 1 0 ... 11116// Build up the index vector for interleaving the concatenated vector 11117// 0 0 1 1 2 2 3 3 ... 11119// 0 n 1 n+1 2 n+2 3 n+3 ... 11123// Then perform the interleave 11124// v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... 11130// Extract the two halves from the interleaved result 11140// Lower step_vector to the vid instruction. Any non-identity step value must 11141// be accounted for my manual expansion. 11145MVT VT =
Op.getSimpleValueType();
11150uint64_t StepValImm =
Op.getConstantOperandVal(0);
11151if (StepValImm != 1) {
11160 VL, VT,
DL, DAG, Subtarget);
11167// Implement vector_reverse using vrgather.vv with indices determined by 11168// subtracting the id of each element from (VLMAX-1). This will convert 11169// the indices like so: 11170// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 11171// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 11175MVT VecVT =
Op.getSimpleValueType();
11184MVT ContainerVT = VecVT;
11194// On some uarchs vrgather.vv will read from every input register for each 11195// output register, regardless of the indices. However to reverse a vector 11196// each output register only needs to read from one register. So decompose it 11197// into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of 11200// vsetvli a1, zero, e64, m4, ta, ma 11201// vrgatherei16.vv v12, v8, v16 11203// vsetvli a1, zero, e64, m1, ta, ma 11204// vrgather.vv v15, v8, v16 11205// vrgather.vv v14, v9, v16 11206// vrgather.vv v13, v10, v16 11207// vrgather.vv v12, v11, v16 11215// Fixed length vectors might not fit exactly into their container, and so 11216// leave a gap in the front of the vector after being reversed. Slide this 11219// x x x x 3 2 1 0 <- v4i16 @ vlen=128 11220// 0 1 2 3 x x x x <- reverse 11221// x x x x 0 1 2 3 <- vslidedown.vx 11246// If this is SEW=8 and VLMAX is potentially more than 256, we need 11247// to use vrgatherei16.vv. 11248if (MaxVLMAX > 256 && EltSize == 8) {
11249// If this is LMUL=8, we have to split before can use vrgatherei16.vv. 11250// Reverse each half, then reassemble them in reverse order. 11251// NOTE: It's also possible that after splitting that VLMAX no longer 11252// requires vrgatherei16.vv. 11258// Reassemble the low and high pieces reversed. 11259// FIXME: This is a CONCAT_VECTORS. 11268// Just promote the int type to i16 which will double the LMUL. 11273// At LMUL > 1, do the index computation in 16 bits to reduce register 11277assert(isUInt<16>(MaxVLMAX - 1));
// Largest VLMAX is 65536 @ zvl65536b 11282// Calculate VLMAX-1 for the desired SEW. 11288// Splat VLMAX-1 taking care to handle SEW==64 on RV32. 11303 DAG.
getUNDEF(ContainerVT), Mask, VL);
11315MVT VecVT =
Op.getSimpleValueType();
11319 int64_t ImmValue = cast<ConstantSDNode>(
Op.getOperand(2))->getSExtValue();
11321if (ImmValue >= 0) {
11322// The operand is a TargetConstant, we need to rebuild it as a regular 11327// The operand is a TargetConstant, we need to rebuild it as a regular 11328// constant rather than negating the original operand. 11337 DownOffset, TrueMask, UpOffset);
11338returngetVSlideup(DAG, Subtarget,
DL, VecVT, SlideDown, V2, UpOffset,
11344RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(
SDValueOp,
11347auto *
Load = cast<LoadSDNode>(
Op);
11350Load->getMemoryVT(),
11351 *
Load->getMemOperand()) &&
11352"Expecting a correctly-aligned load");
11354MVT VT =
Op.getSimpleValueType();
11358// If we know the exact VLEN and our fixed length vector completely fills 11359// the container, use a whole register load instead. 11360constauto [MinVLMAX, MaxVLMAX] =
11363getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11377 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle,
DL, XLenVT);
11380 Ops.push_back(DAG.
getUNDEF(ContainerVT));
11381 Ops.push_back(
Load->getBasePtr());
11386Load->getMemoryVT(),
Load->getMemOperand());
11393RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(
SDValueOp,
11396auto *
Store = cast<StoreSDNode>(
Op);
11399Store->getMemoryVT(),
11400 *
Store->getMemOperand()) &&
11401"Expecting a correctly-aligned store");
11407// If the size less than a byte, we need to pad with zeros to make a byte. 11420// If we know the exact VLEN and our fixed length vector completely fills 11421// the container, use a whole register store instead. 11422constauto [MinVLMAX, MaxVLMAX] =
11425getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11436 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse,
DL, XLenVT);
11439 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11440Store->getMemoryVT(),
Store->getMemOperand());
11446MVT VT =
Op.getSimpleValueType();
11448constauto *MemSD = cast<MemSDNode>(
Op);
11449EVT MemVT = MemSD->getMemoryVT();
11451SDValue Chain = MemSD->getChain();
11455bool IsExpandingLoad =
false;
11456if (
constauto *VPLoad = dyn_cast<VPLoadSDNode>(
Op)) {
11457Mask = VPLoad->getMask();
11459 VL = VPLoad->getVectorLength();
11461constauto *MLoad = cast<MaskedLoadSDNode>(
Op);
11462Mask = MLoad->getMask();
11463 PassThru = MLoad->getPassThru();
11464 IsExpandingLoad = MLoad->isExpandingLoad();
11471MVT ContainerVT = VT;
11485if (!IsUnmasked && IsExpandingLoad) {
11492unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11493 : Intrinsic::riscv_vle_mask;
11495if (IntID == Intrinsic::riscv_vle)
11496 Ops.push_back(DAG.
getUNDEF(ContainerVT));
11498 Ops.push_back(PassThru);
11499 Ops.push_back(BasePtr);
11500if (IntID == Intrinsic::riscv_vle_mask)
11501 Ops.push_back(Mask);
11503if (IntID == Intrinsic::riscv_vle_mask)
11510 Chain =
Result.getValue(1);
11512MVT IndexVT = ContainerVT;
11517bool UseVRGATHEREI16 =
false;
11518// If index vector is an i8 vector and the element count exceeds 256, we 11519// should change the element type of index vector to i16 to avoid 11522// FIXME: We need to do vector splitting manually for LMUL=8 cases. 11525 UseVRGATHEREI16 =
true;
11531 DAG.
getUNDEF(IndexVT), Mask, ExpandingVL);
11535DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11548constauto *MemSD = cast<MemSDNode>(
Op);
11549EVT MemVT = MemSD->getMemoryVT();
11551SDValue Chain = MemSD->getChain();
11555bool IsCompressingStore =
false;
11556if (
constauto *VPStore = dyn_cast<VPStoreSDNode>(
Op)) {
11557 Val = VPStore->getValue();
11558Mask = VPStore->getMask();
11559 VL = VPStore->getVectorLength();
11561constauto *MStore = cast<MaskedStoreSDNode>(
Op);
11562 Val = MStore->getValue();
11563Mask = MStore->getMask();
11564 IsCompressingStore = MStore->isCompressingStore();
11573MVT ContainerVT = VT;
11578if (!IsUnmasked || IsCompressingStore) {
11587if (IsCompressingStore) {
11590 DAG.
getUNDEF(ContainerVT), Val, Mask, VL);
11597 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11599 Ops.push_back(Val);
11600 Ops.push_back(BasePtr);
11602 Ops.push_back(Mask);
11606 DAG.
getVTList(MVT::Other), Ops, MemVT, MMO);
11618MVT ContainerVT = VT;
11631 Passthru, Val, Mask, VL);
11640RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(
SDValueOp,
11642MVT InVT =
Op.getOperand(0).getSimpleValueType();
11645MVT VT =
Op.getSimpleValueType();
11659 {Op1, Op2,
Op.getOperand(2), DAG.
getUNDEF(MaskVT), Mask, VL});
11666unsigned Opc =
Op.getOpcode();
11673MVT VT =
Op.getSimpleValueType();
11676// RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE 11679// Expand strict_fsetccs(x, oeq) to 11680// (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) 11690// Tmp1 and Tmp2 might be the same node. 11696// Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) 11706MVT ContainerInVT = InVT;
11720// VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only 11721// active when both input elements are ordered. 11725 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11729 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11733// Use Mask as the passthru operand to let the result be 0 if either of the 11734// inputs is unordered. 11737 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11742 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11752// Lower vector ABS to smax(X, sub(0, X)). 11755MVT VT =
Op.getSimpleValueType();
11759"Unexpected type for ISD::ABS");
11761MVT ContainerVT = VT;
11768if (
Op->getOpcode() == ISD::VP_ABS) {
11773 VL =
Op->getOperand(2);
11781 DAG.
getUNDEF(ContainerVT), Mask, VL);
11783 DAG.
getUNDEF(ContainerVT), Mask, VL);
11790SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11793MVT VT =
Op.getSimpleValueType();
11797"Can only handle COPYSIGN with matching types.");
11806 Sign, DAG.
getUNDEF(ContainerVT), Mask, VL);
11811SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11813MVT VT =
Op.getSimpleValueType();
11830 Op2, DAG.
getUNDEF(ContainerVT), VL);
11841MVT VT =
Op.getSimpleValueType();
11844// Create list of operands by converting existing ones to scalable types. 11846for (
constSDValue &V :
Op->op_values()) {
11847assert(!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!");
11849// Pass through non-vector operands. 11850if (!
V.getValueType().isVector()) {
11855// "cast" fixed length vector to a scalable vector. 11856assert(useRVVForFixedLengthVectorVT(
V.getSimpleValueType()) &&
11857"Only fixed length vectors are supported!");
11869// StrictFP operations have two result values. Their lowered result should 11870// have same result count. 11871if (
Op->isStrictFPOpcode()) {
11880 DAG.
getNode(NewOpc,
DL, ContainerVT, Ops,
Op->getFlags());
11884// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: 11885// * Operands of each node are assumed to be in the same order. 11886// * The EVL operand is promoted from i32 to i64 on RV64. 11887// * Fixed-length vectors are converted to their scalable-vector container 11894MVT VT =
Op.getSimpleValueType();
11897MVT ContainerVT = VT;
11903assert(!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!");
11904// Add dummy passthru value before the mask. Or if there isn't a mask, 11906if (HasPassthruOp) {
11909if (*MaskIdx == OpIdx.index())
11913if (
Op.getOpcode() == ISD::VP_MERGE) {
11914// For VP_MERGE, copy the false operand instead of an undef value. 11917assert(
Op.getOpcode() == ISD::VP_SELECT);
11918// For VP_SELECT, add an undef value. 11923// VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL. 11928// Pass through operands which aren't fixed-length vectors. 11929if (!
V.getValueType().isFixedLengthVector()) {
11933// "cast" fixed length vector to a scalable vector. 11934MVT OpVT =
V.getSimpleValueType();
11936assert(useRVVForFixedLengthVectorVT(OpVT) &&
11937"Only fixed length vectors are supported!");
11942return DAG.
getNode(RISCVISDOpc,
DL, VT, Ops,
Op->getFlags());
11952MVT VT =
Op.getSimpleValueType();
11955// NOTE: Mask is dropped. 11958MVT ContainerVT = VT;
11968 DAG.
getUNDEF(ContainerVT), Zero, VL);
11971Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1,
DL, XLenVT);
11973 DAG.
getUNDEF(ContainerVT), SplatValue, VL);
11976 ZeroSplat, DAG.
getUNDEF(ContainerVT), VL);
11985MVT VT =
Op.getSimpleValueType();
11989ISD::CondCode Condition = cast<CondCodeSDNode>(
Op.getOperand(2))->get();
11990// NOTE: Mask is dropped. 11993MVT ContainerVT = VT;
12003switch (Condition) {
12010// X == Y --> ~(X^Y) 12018// X >s Y --> X == 0 & Y == 1 --> ~X & Y 12019// X <u Y --> X == 0 & Y == 1 --> ~X & Y 12027// X <s Y --> X == 1 & Y == 0 --> ~Y & X 12028// X >u Y --> X == 1 & Y == 0 --> ~Y & X 12036// X >=s Y --> X == 0 | Y == 1 --> ~X | Y 12037// X <=u Y --> X == 0 | Y == 1 --> ~X | Y 12045// X <=s Y --> X == 1 | Y == 0 --> ~Y | X 12046// X >=u Y --> X == 1 | Y == 0 --> ~Y | X 12061// Lower Floating-Point/Integer Type-Convert VP SDNodes 12071MVT DstVT =
Op.getSimpleValueType();
12072MVT SrcVT = Src.getSimpleValueType();
12085if (DstEltSize >= SrcEltSize) {
// Single-width and widening conversion. 12093// Do we need to do any pre-widening before converting? 12094if (SrcEltSize == 1) {
12105 ZeroSplat, DAG.
getUNDEF(IntVT), VL);
12106 }
elseif (DstEltSize > (2 * SrcEltSize)) {
12107// Widen before converting. 12110 Src = DAG.
getNode(RISCVISDExtOpc,
DL, IntVT, Src, Mask, VL);
12116"Wrong input/output vector types");
12118// Convert f16 to f32 then convert f32 to i64. 12119if (DstEltSize > (2 * SrcEltSize)) {
12129 }
else {
// Narrowing + Conversion 12132// First do a narrowing convert to an FP type half the size, then round 12133// the FP type to a small FP type if needed. 12135MVT InterimFVT = DstVT;
12136if (SrcEltSize > (2 * DstEltSize)) {
12137assert(SrcEltSize == (4 * DstEltSize) &&
"Unexpected types!");
12144if (InterimFVT != DstVT) {
12150"Wrong input/output vector types");
12151// First do a narrowing conversion to an integer half the size, then 12152// truncate if needed. 12154if (DstEltSize == 1) {
12155// First convert to the same size integer, then convert to mask using 12157assert(SrcEltSize >= 16 &&
"Unexpected FP type!");
12162// Compare the integer result to 0. The integer should be 0 or 1/-1, 12163// otherwise the conversion was undefined. 12167 DAG.
getUNDEF(InterimIVT), SplatZero, VL);
12177while (InterimIVT != DstVT) {
12189MVT VT =
Op.getSimpleValueType();
12198MVT VT =
Op.getSimpleValueType();
12206// Use default legalization if a vector of EVL type would be legal. 12212MVT ContainerVT = VT;
12220// Promote to a vector of i8. 12223// Promote TrueVal and FalseVal using VLMax. 12224// FIXME: Is there a better way to do this? 12233 SplatZero, DAG.
getUNDEF(PromotedVT), VL);
12234// Any element past VL uses FalseVal, so use VLMax 12236 SplatOne, SplatZero, DAG.
getUNDEF(PromotedVT), VLMax);
12238// VP_MERGE the two promoted values. 12240 TrueVal, FalseVal, FalseVal, VL);
12242// Convert back to mask. 12255RISCVTargetLowering::lowerVPSpliceExperimental(
SDValueOp,
12267MVT VT =
Op.getSimpleValueType();
12268MVT ContainerVT = VT;
12281// Expand input operands 12289 SplatZeroOp1, DAG.
getUNDEF(ContainerVT), EVL1);
12298 SplatZeroOp2, DAG.
getUNDEF(ContainerVT), EVL2);
12301 int64_t ImmValue = cast<ConstantSDNode>(
Offset)->getSExtValue();
12303if (ImmValue >= 0) {
12304// The operand is a TargetConstant, we need to rebuild it as a regular 12309// The operand is a TargetConstant, we need to rebuild it as a regular 12310// constant rather than negating the original operand. 12317 Op1, DownOffset, Mask, UpOffset);
12322// Truncate Result back to a mask vector (Result has same EVL as Op2) 12325 {Result, DAG.getConstant(0, DL, ContainerVT),
12326 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12341MVT VT =
Op.getSimpleValueType();
12343MVT ContainerVT = VT;
12359RISCVTargetLowering::lowerVPReverseExperimental(
SDValueOp,
12362MVT VT =
Op.getSimpleValueType();
12369MVT ContainerVT = VT;
12377MVT GatherVT = ContainerVT;
12379// Check if we are working with mask vectors 12384// Expand input operand 12392 SplatZero, DAG.
getUNDEF(IndicesVT), EVL);
12402// If this is SEW=8 and VLMAX is unknown or more than 256, we need 12403// to use vrgatherei16.vv. 12404// TODO: It's also possible to use vrgatherei16.vv for other types to 12405// decrease register width for the index calculation. 12406// NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 12407if (MaxVLMAX > 256 && EltSize == 8) {
12408// If this is LMUL=8, we have to split before using vrgatherei16.vv. 12409// Split the vector in half and reverse each half using a full register 12411// Swap the halves and concatenate them. 12412// Slide the concatenated result by (VLMax - VL). 12420// Reassemble the low and high pieces reversed. 12421// NOTE: this Result is unmasked (because we do not need masks for 12422// shuffles). If in the future this has to change, we can use a SELECT_VL 12423// between Result and UNDEF using the mask originally passed to VP_REVERSE 12427// Slide off any elements from past EVL that were reversed into the low 12435 DAG.
getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12438// Truncate Result back to a mask vector 12451// Just promote the int type to i16 which will double the LMUL. 12460 DAG.
getUNDEF(IndicesVT), VecLen, EVL);
12462 DAG.
getUNDEF(IndicesVT), Mask, EVL);
12464 DAG.
getUNDEF(GatherVT), Mask, EVL);
12467// Truncate Result back to a mask vector 12481MVT VT =
Op.getSimpleValueType();
12483return lowerVPOp(
Op, DAG);
12485// It is safe to drop mask parameter as masked-off elements are undef. 12490MVT ContainerVT = VT;
12509MVT VT =
Op.getSimpleValueType();
12510MVT ContainerVT = VT;
12516auto *VPNode = cast<VPStridedLoadSDNode>(
Op);
12517// Check if the mask is known to be all ones 12522 : Intrinsic::riscv_vlse_mask,
12525 DAG.
getUNDEF(ContainerVT), VPNode->getBasePtr(),
12526 VPNode->getStride()};
12534 Ops.
push_back(VPNode->getVectorLength());
12542 VPNode->getMemoryVT(), VPNode->getMemOperand());
12556auto *VPNode = cast<VPStridedStoreSDNode>(
Op);
12557SDValue StoreVal = VPNode->getValue();
12559MVT ContainerVT = VT;
12565// Check if the mask is known to be all ones 12570 : Intrinsic::riscv_vsse_mask,
12573 VPNode->getBasePtr(), VPNode->getStride()};
12581 Ops.
push_back(VPNode->getVectorLength());
12584 Ops, VPNode->getMemoryVT(),
12585 VPNode->getMemOperand());
12588// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be 12589// matched to a RVV indexed load. The RVV indexed load instructions only 12590// support the "unsigned unscaled" addressing mode; indices are implicitly 12591// zero-extended or truncated to XLEN and are treated as byte offsets. Any 12592// signed or scaled indexing is extended to the XLEN value type and scaled 12597MVT VT =
Op.getSimpleValueType();
12599constauto *MemSD = cast<MemSDNode>(
Op.getNode());
12600EVT MemVT = MemSD->getMemoryVT();
12602SDValue Chain = MemSD->getChain();
12608if (
auto *VPGN = dyn_cast<VPGatherSDNode>(
Op.getNode())) {
12609Index = VPGN->getIndex();
12610Mask = VPGN->getMask();
12612 VL = VPGN->getVectorLength();
12613// VP doesn't support extending loads. 12616// Else it must be a MGATHER. 12617auto *MGN = cast<MaskedGatherSDNode>(
Op.getNode());
12618Index = MGN->getIndex();
12619Mask = MGN->getMask();
12620 PassThru = MGN->getPassThru();
12624MVT IndexVT =
Index.getSimpleValueType();
12629assert(
BasePtr.getSimpleValueType() == XLenVT &&
"Unexpected pointer type");
12630// Targets have to explicitly opt-in for extending vector loads. 12632"Unexpected extending MGATHER/VP_GATHER");
12634// If the mask is known to be all ones, optimize to an unmasked intrinsic; 12635// the selection of the masked intrinsics doesn't do this for us. 12638MVT ContainerVT = VT;
12662 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12679 Chain =
Result.getValue(1);
12687// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be 12688// matched to a RVV indexed store. The RVV indexed store instructions only 12689// support the "unsigned unscaled" addressing mode; indices are implicitly 12690// zero-extended or truncated to XLEN and are treated as byte offsets. Any 12691// signed or scaled indexing is extended to the XLEN value type and scaled 12696constauto *MemSD = cast<MemSDNode>(
Op.getNode());
12697EVT MemVT = MemSD->getMemoryVT();
12699SDValue Chain = MemSD->getChain();
12702 [[maybe_unused]]
bool IsTruncatingStore =
false;
12705if (
auto *VPSN = dyn_cast<VPScatterSDNode>(
Op.getNode())) {
12706Index = VPSN->getIndex();
12707Mask = VPSN->getMask();
12708 Val = VPSN->getValue();
12709 VL = VPSN->getVectorLength();
12710// VP doesn't support truncating stores. 12711 IsTruncatingStore =
false;
12713// Else it must be a MSCATTER. 12714auto *MSN = cast<MaskedScatterSDNode>(
Op.getNode());
12715Index = MSN->getIndex();
12716Mask = MSN->getMask();
12717 Val = MSN->getValue();
12718 IsTruncatingStore = MSN->isTruncatingStore();
12722MVT IndexVT =
Index.getSimpleValueType();
12727assert(
BasePtr.getSimpleValueType() == XLenVT &&
"Unexpected pointer type");
12728// Targets have to explicitly opt-in for extending vector loads and 12729// truncating vector stores. 12730assert(!IsTruncatingStore &&
"Unexpected truncating MSCATTER/VP_SCATTER");
12732// If the mask is known to be all ones, optimize to an unmasked intrinsic; 12733// the selection of the masked intrinsics doesn't do this for us. 12736MVT ContainerVT = VT;
12760 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12770 DAG.
getVTList(MVT::Other), Ops, MemVT, MMO);
12782// Encoding used for rounding mode in RISC-V differs from that used in 12783// FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a 12784// table, which consists of a sequence of 4-bit fields, each representing 12785// corresponding FLT_ROUNDS mode. 12786staticconstint Table =
12811// Encoding used for rounding mode in RISC-V differs from that used in 12812// FLT_ROUNDS. To convert it the C rounding mode is used as an index in 12813// a table, which consists of a sequence of 4-bit fields, each representing 12814// corresponding RISC-V mode. 12815staticconstunsigned Table =
12838bool isRISCV64 = Subtarget.
is64Bit();
12845// Returns the opcode of the target-specific SDNode that implements the 32-bit 12846// form of the given Opcode. 12870// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 12871// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would 12872// otherwise be promoted to i64, making it difficult to select the 12873// SLLW/DIVUW/.../*W later one because the fact the operation was originally of 12874// type i8/i16/i32 is lost. 12882// ReplaceNodeResults requires we maintain the same type for the return value. 12886// Converts the given 32-bit operation to a i64 operation with signed extension 12887// semantic to reduce the signed extension instructions. 12902switch (
N->getOpcode()) {
12904llvm_unreachable(
"Don't know how to custom type legalize this operation!");
12910"Unexpected custom legalisation");
12911bool IsStrict =
N->isStrictFPOpcode();
12914SDValue Op0 = IsStrict ?
N->getOperand(1) :
N->getOperand(0);
12921// In absense of Zfh, promote f16 to f32, then convert. 12932 Opc,
DL, VTs, Chain, Op0,
12938// For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then 12952// If the FP type needs to be softened, emit a library call using the 'si' 12953// version. If we left it to default legalization we'd end up with 'di'. If 12954// the FP type doesn't need to be softened just let generic type 12955// legalization promote the result type. 12966 std::tie(Result, Chain) =
12967makeLibCall(DAG, LC,
N->getValueType(0), Op0, CallOptions,
DL, Chain);
12981// In absense of Zfh, promote f16 to f32, then convert. 12991// If the FP type needs to be softened, emit a library call to lround. We'll 12992// need to truncate the result. We assume any value that doesn't fit in i32 12993// is allowed to return an unspecified value. 12995 Op0.
getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
13006assert(!Subtarget.
is64Bit() &&
"READCYCLECOUNTER/READSTEADYCOUNTER only " 13007"has custom type legalization on riscv32");
13020N->getOperand(0), LoCounter, HiCounter);
13031// Use a SEXTLOAD instead of the default EXTLOAD. Similar to the 13032// sext_inreg we emit for ADD/SUB/MUL/SLLI. 13044unsignedSize =
N->getSimpleValueType(0).getSizeInBits();
13045unsigned XLen = Subtarget.
getXLen();
13046// This multiply needs to be expanded, try to use MULHSU+MUL if possible. 13048assert(
Size == (XLen * 2) &&
"Unexpected custom legalisation");
13055// We need exactly one side to be unsigned. 13056if (LHSIsU == RHSIsU)
13071// The other operand should be signed, but still prefer MULH when 13073if (RHSIsU && LHSIsS && !RHSIsS)
13075elseif (LHSIsU && RHSIsS && !LHSIsS)
13085"Unexpected custom legalisation");
13092"Unexpected custom legalisation");
13094// If we can use a BSET instruction, allow default promotion to apply. 13095if (
N->getOpcode() ==
ISD::SHL && Subtarget.hasStdExtZbs() &&
13102// Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is 13103// similar to customLegalizeToWOpWithSExt, but we must zero_extend the 13121"Unexpected custom legalisation");
13122assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13123 Subtarget.hasVendorXTHeadBb()) &&
13124"Unexpected custom legalization");
13125if (!isa<ConstantSDNode>(
N->getOperand(1)) &&
13126 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13135"Unexpected custom legalisation");
13149MVT VT =
N->getSimpleValueType(0);
13150assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13151 Subtarget.
is64Bit() && Subtarget.hasStdExtM() &&
13152"Unexpected custom legalisation");
13153// Don't promote division/remainder by constant since we should expand those 13154// to multiply by magic constant. 13160// If the input is i32, use ANY_EXTEND since the W instructions don't read 13161// the upper 32 bits. For other types we need to sign or zero extend 13162// based on the opcode. 13173"Unexpected custom legalisation");
13175// If the RHS is a constant, we can simplify ConditionRHS below. Otherwise 13176// use the default legalization. 13177if (!isa<ConstantSDNode>(
N->getOperand(1)))
13188// For an addition, the result should be less than one of the operands (LHS) 13189// if and only if the other operand (RHS) is negative, otherwise there will 13191// For a subtraction, the result should be less than one of the operands 13192// (LHS) if and only if the other operand (RHS) is (non-zero) positive, 13193// otherwise there will be overflow. 13194EVT OType =
N->getValueType(1);
13207"Unexpected custom legalisation");
13209// Create an ADDW or SUBW. 13219// Special case uaddo X, 1 overflowed if the addition result is 0. 13220// The general case (X + C) < C is not necessarily beneficial. Although we 13221// reduce the live range of X, we may introduce the materialization of 13222// constant C, especially when the setcc result is used by branch. We have 13223// no compare with constant and branch instructions. 13224 Overflow = DAG.
getSetCC(
DL,
N->getValueType(1), Res,
13227// Special case uaddo X, -1 overflowed if X != 0. 13228 Overflow = DAG.
getSetCC(
DL,
N->getValueType(1),
N->getOperand(0),
13231// Sign extend the LHS and perform an unsigned compare with the ADDW 13232// result. Since the inputs are sign extended from i32, this is equivalent 13233// to comparing the lower 32 bits. 13246 !Subtarget.hasStdExtZbb() &&
"Unexpected custom legalisation");
13247// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 13248// promotion for UADDO/USUBO. 13255"Unexpected custom legalisation");
13261"Unexpected custom legalisation");
13263if (Subtarget.hasStdExtZbb()) {
13264// Emit a special ABSW node that will be expanded to NEGW+MAX at isel. 13265// This allows us to remember that the result is sign extended. Expanding 13266// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. 13274// Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) 13277// Freeze the source so we can increase it's use count. 13280// Copy sign bit to all bits using the sraiw pattern. 13289// NOTE: The result is only required to be anyextended, but sext is 13290// consistent with type legalization of sub. 13297EVT VT =
N->getValueType(0);
13302if (VT == MVT::i16 &&
13304 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13307 }
elseif (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.
is64Bit() &&
13312 }
elseif (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.
is64Bit() &&
13315 DAG.
getVTList(MVT::i32, MVT::i32), Op0);
13321// Custom-legalize bitcasts from fixed-length vector types to illegal 13322// scalar types in order to improve codegen. Bitcast the vector to a 13323// one-element vector type whose element type is the same as the result 13324// type, and extract the first element. 13336MVT VT =
N->getSimpleValueType(0);
13338assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.
is64Bit())) &&
13339"Unexpected custom legalisation");
13342"Unexpected extension");
13345// ReplaceNodeResults requires we maintain the same type for the return 13351// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 13352// type is illegal (currently only vXi64 RV32). 13353// With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 13354// transferred to the destination register. We issue two of these from the 13355// upper- and lower- halves of the SEW-bit vector element, slid down to the 13360// The vector type hasn't been legalized yet so we can't issue target 13361// specific nodes if it needs legalization. 13362// FIXME: We would manually legalize if it's important. 13370"Unexpected EXTRACT_VECTOR_ELT legalization");
13372// If this is a fixed vector, we need to convert it to a scalable vector. 13373MVT ContainerVT = VecVT;
13381// Use a VL of 1 to avoid processing more elements than we need. 13384// Unless the index is known to be 0, we must slide the vector down to get 13385// the desired element into index 0. 13391// Extract the lower XLEN bits of the correct vector element. 13394// To extract the upper XLEN bits of the vector element, shift the first 13395// element right by 32 bits and re-extract the lower XLEN bits. 13401 DAG.
getUNDEF(ContainerVT), Mask, VL);
13409unsigned IntNo =
N->getConstantOperandVal(0);
13413"Don't know how to custom type legalize this intrinsic!");
13414case Intrinsic::experimental_get_vector_length: {
13419case Intrinsic::experimental_cttz_elts: {
13425case Intrinsic::riscv_orc_b:
13426case Intrinsic::riscv_brev8:
13427case Intrinsic::riscv_sha256sig0:
13428case Intrinsic::riscv_sha256sig1:
13429case Intrinsic::riscv_sha256sum0:
13430case Intrinsic::riscv_sha256sum1:
13431case Intrinsic::riscv_sm3p0:
13432case Intrinsic::riscv_sm3p1: {
13433if (!Subtarget.
is64Bit() ||
N->getValueType(0) != MVT::i32)
13453case Intrinsic::riscv_sm4ks:
13454case Intrinsic::riscv_sm4ed: {
13462 DAG.
getNode(Opc,
DL, MVT::i64, NewOp0, NewOp1,
N->getOperand(3));
13466case Intrinsic::riscv_mopr: {
13467if (!Subtarget.
is64Bit() ||
N->getValueType(0) != MVT::i32)
13477case Intrinsic::riscv_moprr: {
13478if (!Subtarget.
is64Bit() ||
N->getValueType(0) != MVT::i32)
13490case Intrinsic::riscv_clmul: {
13491if (!Subtarget.
is64Bit() ||
N->getValueType(0) != MVT::i32)
13502case Intrinsic::riscv_clmulh:
13503case Intrinsic::riscv_clmulr: {
13504if (!Subtarget.
is64Bit() ||
N->getValueType(0) != MVT::i32)
13507// Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros 13508// to the full 128-bit clmul result of multiplying two xlen values. 13509// Perform clmulr or clmulh on the shifted values. Finally, extract the 13512// The alternative is to mask the inputs to 32 bits and use clmul, but 13513// that requires two shifts to mask each input without zext.w. 13514// FIXME: If the inputs are known zero extended or could be freely 13515// zero extended, the mask form would be better. 13532case Intrinsic::riscv_vmv_x_s: {
13533EVT VT =
N->getValueType(0);
13536// Simple case just extract using vmv.x.s and truncate. 13544"Unexpected custom legalization");
13546// We need to do the move in two steps. 13550// First extract the lower XLEN bits of the element. 13553// To extract the upper XLEN bits of the vector element, shift the first 13554// element right by 32 bits and re-extract the lower XLEN bits. 13582case ISD::VP_REDUCE_ADD:
13583case ISD::VP_REDUCE_AND:
13584case ISD::VP_REDUCE_OR:
13585case ISD::VP_REDUCE_XOR:
13586case ISD::VP_REDUCE_SMAX:
13587case ISD::VP_REDUCE_UMAX:
13588case ISD::VP_REDUCE_SMIN:
13589case ISD::VP_REDUCE_UMIN:
13603/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP 13604/// which corresponds to it. 13626// Note: This is the associative form of the generic reduction opcode. 13631/// Perform two related transforms whose purpose is to incrementally recognize 13632/// an explode_vector followed by scalar reduction as a vector reduction node. 13633/// This exists to recover from a deficiency in SLP which can't handle 13634/// forests with multiple roots sharing common nodes. In some cases, one 13635/// of the trees will be vectorized, and the other will remain (unprofitably) 13641// This transforms need to run before all integer types have been legalized 13642// to i64 (so that the vector element type matches the add type), and while 13643// it's safe to introduce odd sized vector types. 13647// Without V, this transform isn't useful. We could form the (illegal) 13648// operations and let them be scalarized again, but there's really no point. 13653constEVT VT =
N->getValueType(0);
13654constunsigned Opc =
N->getOpcode();
13656// For FADD, we only handle the case with reassociation allowed. We 13657// could handle strict reduction order, but at the moment, there's no 13658// known reason to, and the complexity isn't worth it. 13659// TODO: Handle fminnum and fmaxnum here 13661 (Opc !=
ISD::FADD || !
N->getFlags().hasAllowReassociation()))
13666"Inconsistent mappings");
13677 !isa<ConstantSDNode>(
RHS.getOperand(1)))
13680uint64_t RHSIdx = cast<ConstantSDNode>(
RHS.getOperand(1))->getLimitedValue();
13690// match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to 13691// reduce_op (extract_subvector [2 x VT] from V). This will form the 13692// root of our reduction tree. TODO: We could extend this to any two 13693// adjacent aligned constant indices if desired. 13695LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(
LHS.getOperand(1))) {
13697 cast<ConstantSDNode>(
LHS.getOperand(1))->getLimitedValue();
13698if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13702return DAG.
getNode(ReduceOpc,
DL, VT, Vec,
N->getFlags());
13706// Match (binop (reduce (extract_subvector V, 0), 13707// (extract_vector_elt V, sizeof(SubVec)))) 13708// into a reduction of one more element from the original vector V. 13709if (
LHS.getOpcode() != ReduceOpc)
13717// For illegal types (e.g. 3xi32), most will be combined again into a 13718// wider (hopefully legal) type. If this is a terminal state, we are 13719// relying on type legalization here to produce something reasonable 13720// and this lowering quality could probably be improved. (TODO) 13724return DAG.
getNode(ReduceOpc,
DL, VT, Vec,
13732// Try to fold (<bop> x, (reduction.<bop> vec, start)) 13735auto BinOpToRVVReduce = [](
unsigned Opc) {
13764auto IsReduction = [&BinOpToRVVReduce](
SDValue V,
unsigned Opc) {
13767 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13770unsigned Opc =
N->getOpcode();
13772if (IsReduction(
N->getOperand(0), Opc))
13774elseif (IsReduction(
N->getOperand(1), Opc))
13779// Skip if FADD disallows reassociation but the combiner needs. 13780if (Opc ==
ISD::FADD && !
N->getFlags().hasAllowReassociation())
13783SDValue Extract =
N->getOperand(ReduceIdx);
13795// Make sure that ScalarV is a splat with VL=1. 13804// Check the scalar of ScalarV is neutral element 13805// TODO: Deal with value other than neutral element. 13810// If the AVL is zero, operand 0 will be returned. So it's not safe to fold. 13811// FIXME: We might be able to improve this if operand 0 is undef. 13815SDValue NewStart =
N->getOperand(1 - ReduceIdx);
13822// If we looked through an INSERT_SUBVECTOR we need to restore it. 13837// Optimize (add (shl x, c0), (shl y, c1)) -> 13838// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. 13841// Perform this optimization only in the zba extension. 13842if (!Subtarget.hasStdExtZba())
13845// Skip for vector types and larger types. 13846EVT VT =
N->getValueType(0);
13850// The two operand nodes must be SHL and have no other use. 13858auto *N0C = dyn_cast<ConstantSDNode>(N0->
getOperand(1));
13859auto *N1C = dyn_cast<ConstantSDNode>(N1->
getOperand(1));
13862 int64_t C0 = N0C->getSExtValue();
13863 int64_t C1 = N1C->getSExtValue();
13864if (C0 <= 0 || C1 <= 0)
13867// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. 13868 int64_t Bits = std::min(C0, C1);
13869 int64_t Diff = std::abs(C0 - C1);
13870if (Diff != 1 && Diff != 2 && Diff != 3)
13882// Combine a constant select operand into its use: 13884// (and (select cond, -1, c), x) 13885// -> (select cond, x, (and x, c)) [AllOnes=1] 13886// (or (select cond, 0, c), x) 13887// -> (select cond, x, (or x, c)) [AllOnes=0] 13888// (xor (select cond, 0, c), x) 13889// -> (select cond, x, (xor x, c)) [AllOnes=0] 13890// (add (select cond, 0, c), x) 13891// -> (select cond, x, (add x, c)) [AllOnes=0] 13892// (sub x, (select cond, 0, c)) 13893// -> (select cond, x, (sub x, c)) [AllOnes=0] 13897EVT VT =
N->getValueType(0);
13904// (select cond, x, (and x, c)) has custom lowering with Zicond. 13905if ((!Subtarget.hasStdExtZicond() &&
13906 !Subtarget.hasVendorXVentanaCondOps()) ||
13910// Maybe harmful when condition code has multiple use. 13914// Maybe harmful when VT is wider than XLen. 13934 SwapSelectOps =
false;
13935 NonConstantVal = FalseVal;
13937 SwapSelectOps =
true;
13938 NonConstantVal = TrueVal;
13942// Slct is now know to be the desired identity constant when CC is true. 13944 FalseVal = DAG.
getNode(
N->getOpcode(),
SDLoc(
N), VT, OtherOp, NonConstantVal);
13945// Unless SwapSelectOps says the condition should be false. 13958// Attempt combineSelectAndUse on each operand of a commutative operator N. 13971// Transform (add (mul x, c0), c1) -> 13972// (add (mul (add x, c1/c0), c0), c1%c0). 13973// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case 13974// that should be excluded is when c0*(c1/c0) is simm12, which will lead 13975// to an infinite loop in DAGCombine if transformed. 13976// Or transform (add (mul x, c0), c1) -> 13977// (add (mul (add x, c1/c0+1), c0), c1%c0-c0), 13978// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner 13979// case that should be excluded is when c0*(c1/c0+1) is simm12, which will 13980// lead to an infinite loop in DAGCombine if transformed. 13981// Or transform (add (mul x, c0), c1) -> 13982// (add (mul (add x, c1/c0-1), c0), c1%c0+c0), 13983// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner 13984// case that should be excluded is when c0*(c1/c0-1) is simm12, which will 13985// lead to an infinite loop in DAGCombine if transformed. 13986// Or transform (add (mul x, c0), c1) -> 13987// (mul (add x, c1/c0), c0). 13988// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. 13991// Skip for vector types and larger types. 13992EVT VT =
N->getValueType(0);
13995// The first operand node must be a MUL and has no other use. 13999// Check if c0 and c1 match above conditions. 14000auto *N0C = dyn_cast<ConstantSDNode>(N0->
getOperand(1));
14001auto *N1C = dyn_cast<ConstantSDNode>(
N->getOperand(1));
14004// If N0C has multiple uses it's possible one of the cases in 14005// DAGCombiner::isMulAddWithConstProfitable will be true, which would result 14006// in an infinite loop. 14007if (!N0C->hasOneUse())
14009 int64_t C0 = N0C->getSExtValue();
14010 int64_t C1 = N1C->getSExtValue();
14012if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14014// Search for proper CA (non-zero) and CB that both are simm12. 14015if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14016 !isInt<12>(C0 * (C1 / C0))) {
14019 }
elseif ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14020 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14023 }
elseif ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14024 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14029// Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). 14038// add (zext, zext) -> zext (add (zext, zext)) 14039// sub (zext, zext) -> sext (sub (zext, zext)) 14040// mul (zext, zext) -> zext (mul (zext, zext)) 14041// sdiv (zext, zext) -> zext (sdiv (zext, zext)) 14042// udiv (zext, zext) -> zext (udiv (zext, zext)) 14043// srem (zext, zext) -> zext (srem (zext, zext)) 14044// urem (zext, zext) -> zext (urem (zext, zext)) 14046// where the sum of the extend widths match, and the the range of the bin op 14047// fits inside the width of the narrower bin op. (For profitability on rvv, we 14048// use a power of two for both inner and outer extend.) 14051EVT VT =
N->getValueType(0);
14077// Src0 and Src1 are zero extended, so they're always positive if signed. 14079// sub can produce a negative from two positive operands, so it needs sign 14080// extended. Other nodes produce a positive from two positive operands, so 14081// zero extend instead. 14082unsigned OuterExtend =
14086 OuterExtend,
SDLoc(
N), VT,
14090// Try to turn (add (xor bool, 1) -1) into (neg bool). 14094EVT VT =
N->getValueType(0);
14097// RHS should be -1. 14101// Look for (xor X, 1). 14105// First xor input should be 0 or 1. 14110// Emit a negate of the setcc. 14133// fold (add (select lhs, rhs, cc, 0, y), x) -> 14134// (select lhs, rhs, cc, x, (add x, y)) 14138// Try to turn a sub boolean RHS and constant LHS into an addi. 14142EVT VT =
N->getValueType(0);
14145// Require a constant LHS. 14146auto *N0C = dyn_cast<ConstantSDNode>(N0);
14150// All our optimizations involve subtracting 1 from the immediate and forming 14151// an ADDI. Make sure the new immediate is valid for an ADDI. 14152APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14158// (sub constant, (setcc x, y, eq/neq)) -> 14159// (add (setcc x, y, neq/eq), constant - 1) 14162if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.
isInteger())
14169// (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). 14170// Since setcc returns a bool the xor is equivalent to 1-setcc. 14179// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is 14180// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X) 14181// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is 14182// valid with Y=3, while 0b0000_1000_0000_0100 is not. 14185if (!Subtarget.hasStdExtZbb())
14188EVT VT =
N->getValueType(0);
14190if (VT != Subtarget.
getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14199auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.
getOperand(1));
14202unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14204if (ShiftedAmount >= 8)
14208SDValue RightShiftOperand = N1;
14210if (ShiftedAmount != 0) {
// Right operand must be a right shift. 14213auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.
getOperand(1));
14214if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14219// At least one shift should have a single use. 14223if (LeftShiftOperand != RightShiftOperand)
14227 Mask <<= ShiftedAmount;
14228// Check that X has indeed the right shape (only the Y-th bit can be set in 14241EVT VT =
N->getValueType(0);
14244// fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) 14261// fold (sub x, (select lhs, rhs, cc, 0, y)) -> 14262// (select lhs, rhs, cc, x, (sub x, y)) 14266// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. 14267// Legalizing setcc can introduce xors like this. Doing this transform reduces 14268// the number of xors and may allow the xor to fold into a branch condition. 14283// For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into 14284// (xor X, -1) based on the upper bits of the other operand being 0. If the 14285// operation is And, allow one of the Xors to use -1. 14290// N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. 14296EVT VT =
N->getValueType(0);
14301// The LHS of the xors needs to be 0/1. 14306// Invert the opcode and insert a new xor. 14313// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to 14314// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed 14315// value to an unsigned value. This will be lowered to vmax and series of 14316// vnclipu instructions later. This can be extended to other truncated types 14317// other than i8 by replacing 256 and 255 with the equivalent constants for the 14320EVT VT =
N->getValueType(0);
14338// FIXME: Support the version of this pattern with the select operands 14347if (CondLHS != True)
14352// FIXME: Support other constants. 14354if (!CondRHSC || CondRHSC->
getAPIntValue() != (1ULL << ScalarBits))
14366if (!FalseRHSC || !FalseRHSC->
isZero())
14373// Emit the signed to unsigned saturation pattern. 14386EVT VT =
N->getValueType(0);
14388// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero 14389// extending X. This is safe since we only need the LSB after the shift and 14390// shift amounts larger than 31 would produce poison. If we wait until 14391// type legalization, we'll create RISCVISD::SRLW and we can't recover it 14392// to use a BEXT instruction. 14393if (Subtarget.
is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14406// Combines two comparison operation and logic operation to one selection 14407// operation(min, max) and logic operation. Returns new constructed Node if 14408// conditions for optimization are satisfied. 14415// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero 14416// extending X. This is safe since we only need the LSB after the shift and 14417// shift amounts larger than 31 would produce poison. If we wait until 14418// type legalization, we'll create RISCVISD::SRLW and we can't recover it 14419// to use a BEXT instruction. 14420if (Subtarget.
is64Bit() && Subtarget.hasStdExtZbs() &&
14442// fold (and (select lhs, rhs, cc, -1, y), x) -> 14443// (select lhs, rhs, cc, x, (and x, y)) 14447// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. 14448// FIXME: Generalize to other binary operators with same operand. 14458// Should have the same condition. 14472EVT VT =
N->getValueType(0);
14496// Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. 14497// We may be able to pull a common operation out of the true and false value. 14505// fold (or (select cond, 0, y), x) -> 14506// (select cond, x, (or x, y)) 14515// Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use 14516// (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create 14517// RISCVISD:::SLLW and we can't recover it to use a BSET instruction. 14518if (Subtarget.
is64Bit() && Subtarget.hasStdExtZbs() &&
14530// fold (xor (sllw 1, x), -1) -> (rolw ~1, x) 14531// NOTE: Assumes ROL being legal means ROLW is legal. 14541// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) 14543auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.
getOperand(0));
14548constAPInt &Imm = ConstN00->getAPIntValue();
14549if ((Imm + 1).isSignedIntN(12))
14560// fold (xor (select cond, 0, y), x) -> 14561// (select cond, x, (xor x, y)) 14565// Try to expand a scalar multiply to a faster sequence. 14570EVT VT =
N->getValueType(0);
14572// LI + MUL is usually smaller than the alternative sequence. 14582constbool HasShlAdd =
14583 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14590// WARNING: The code below is knowingly incorrect with regards to undef semantics. 14591// We're adding additional uses of X here, and in principle, we should be freezing 14592// X before doing so. However, adding freeze here causes real regressions, and no 14593// other target properly freezes X in these cases either. 14597for (
uint64_t Divisor : {3, 5, 9}) {
14598if (MulAmt % Divisor != 0)
14600uint64_t MulAmt2 = MulAmt / Divisor;
14601// 3/5/9 * 2^N -> shl (shXadd X, X), N 14605// Put the shift first if we can fold a zext into the 14606// shift forming a slli.uw. 14607if (
X.getOpcode() ==
ISD::AND && isa<ConstantSDNode>(
X.getOperand(1)) &&
14608X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14615// Otherwise, put rhe shl second so that it can fold with following 14616// instructions (e.g. sext or add). 14624// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) 14625if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14636// If this is a power 2 + 2/4/8, we can use a shift followed by a single 14637// shXadd. First check if this a sum of two power of 2s because that's 14638// easy. Then count how many zeros are up to the first bit. 14641if (ScaleShift >= 1 && ScaleShift < 4) {
14642unsigned ShiftAmt =
Log2_64((MulAmt & (MulAmt - 1)));
14651// 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) 14652// This is the two instruction form, there are also three instruction 14653// variants we could implement. e.g. 14654// (2^(1,2,3) * 3,5,9 + 1) << C2 14655// 2^(C1>3) * 3,5,9 +/- 1 14656for (
uint64_t Divisor : {3, 5, 9}) {
14661if ((
C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14671// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) 14672if (MulAmt > 2 &&
isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14674if (ScaleShift >= 1 && ScaleShift < 4) {
14675unsigned ShiftAmt =
Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14685// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x)) 14700// 2^N - 2^M -> (sub (shl X, C1), (shl X, C2)) 14701uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14703uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14714for (
uint64_t Divisor : {3, 5, 9}) {
14715if (MulAmt % Divisor != 0)
14717uint64_t MulAmt2 = MulAmt / Divisor;
14718// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples 14719// of 25 which happen to be quite common. 14720for (
uint64_t Divisor2 : {3, 5, 9}) {
14721if (MulAmt2 % Divisor2 != 0)
14723uint64_t MulAmt3 = MulAmt2 / Divisor2;
14742// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) -> 14743// (bitcast (sra (v2Xi16 (bitcast X)), 15)) 14744// Same for other equivalent types with other equivalent constants. 14746EVT VT =
N->getValueType(0);
14749// Do this for legal vectors unless they are i1 or i8 vectors. 14753if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
14754N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
14767if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14768 V3 != (HalfSize - 1))
14784EVT VT =
N->getValueType(0);
14794// vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) 14795// (mul x, add (y, 1)) -> (add x, (mul x, y)) 14796// vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) 14797// (mul x, (sub 1, y)) -> (sub x, (mul x, y)) 14798auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
14799 AddSubOpc = V->getOpcode();
14801SDValue Opnd = V->getOperand(1);
14802 MulOper = V->getOperand(0);
14811if (IsAddSubWith1(N0)) {
14813return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
14816if (IsAddSubWith1(N1)) {
14818return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
14830/// According to the property that indexed load/store instructions zero-extend 14831/// their indices, try to narrow the type of index operand. 14833if (isIndexTypeSigned(IndexType))
14836if (!
N->hasOneUse())
14839EVT VT =
N.getValueType();
14842// In general, what we're doing here is seeing if we can sink a truncate to 14843// a smaller element type into the expression tree building our index. 14844// TODO: We can generalize this and handle a bunch more cases if useful. 14846// Narrow a buildvector to the narrowest element type. This requires less 14847// work and less register pressure at high LMUL, and creates smaller constants 14848// which may be cheaper to materialize. 14861// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). 14878EVT SrcVT = Src.getValueType();
14882 NewElen = std::max(NewElen, 8U);
14884// Skip if NewElen is not narrower than the original extended type. 14897// Replace (seteq (i64 (and X, 0xffffffff)), C1) with 14898// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from 14899// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg 14900// can become a sext.w instead of a shift pair. 14905EVT VT =
N->getValueType(0);
14908if (OpVT != MVT::i64 || !Subtarget.
is64Bit())
14911// RHS needs to be a constant. 14912auto *N1C = dyn_cast<ConstantSDNode>(N1);
14916// LHS needs to be (and X, 0xffffffff). 14922// Looking for an equality compare. 14924if (!isIntEqualitySetCC(
Cond))
14927// Don't do this if the sign bit is provably zero, it will be turned back into 14933constAPInt &C1 = N1C->getAPIntValue();
14936// If the constant is larger than 2^32 - 1 it is impossible for both sides 14951EVT VT =
N->getValueType(0);
14952EVT SrcVT = cast<VTSDNode>(
N->getOperand(1))->
getVT();
14953unsigned Opc = Src.getOpcode();
14955// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) 14956// Don't do this with Zhinx. We need to explicitly sign extend the GPR. 14958 Subtarget.hasStdExtZfhmin())
14960 Src.getOperand(0));
14962// Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32 14964 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14967 Src.getOperand(1));
14973// Forward declaration of the structure holding the necessary information to 14975structCombineResult;
14977enum ExtKind :
uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14978/// Helper class for folding sign/zero extensions. 14979/// In particular, this class is used for the following combines: 14980/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w 14981/// sub | sub_vl -> vwsub(u) | vwsub(u)_w 14982/// mul | mul_vl -> vwmul(u) | vwmul_su 14983/// shl | shl_vl -> vwsll 14984/// fadd -> vfwadd | vfwadd_w 14985/// fsub -> vfwsub | vfwsub_w 14987/// An object of this class represents an operand of the operation we want to 14989/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of 14990/// NodeExtensionHelper for `a` and one for `b`. 14992/// This class abstracts away how the extension is materialized and 14993/// how its number of users affect the combines. 14996/// - VWADD_W is conceptually == add(op0, sext(op1)) 14997/// - VWADDU_W == add(op0, zext(op1)) 14998/// - VWSUB_W == sub(op0, sext(op1)) 14999/// - VWSUBU_W == sub(op0, zext(op1)) 15000/// - VFWADD_W == fadd(op0, fpext(op1)) 15001/// - VFWSUB_W == fsub(op0, fpext(op1)) 15002/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to 15003/// zext|sext(smaller_value). 15004structNodeExtensionHelper {
15005 /// Records if this operand is like being zero extended. 15007 /// Records if this operand is like being sign extended. 15008 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For 15009 /// instance, a splat constant (e.g., 3), would support being both sign and 15012 /// Records if this operand is like being floating-Point extended. 15014 /// This boolean captures whether we care if this operand would still be 15015 /// around after the folding happens. 15017 /// Original value that this NodeExtensionHelper represents. 15020 /// Get the value feeding the extension or the value itself. 15021 /// E.g., for zext(a), this would return a. 15035 /// Check if this instance represents a splat. 15041 /// Get the extended opcode. 15042unsigned getExtOpc(ExtKind SupportsExt)
const{
15043switch (SupportsExt) {
15048case ExtKind::FPExt:
15054 /// Get or create a value that can feed \p Root with the given extension \p 15055 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this 15056 /// operand. \see ::getSource(). 15059 std::optional<ExtKind> SupportsExt)
const{
15060if (!SupportsExt.has_value())
15063MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15067if (
Source.getValueType() == NarrowVT)
15070// vfmadd_vl -> vfwmadd_vl can take bf16 operands 15071if (
Source.getValueType().getVectorElementType() == MVT::bf16) {
15077unsigned ExtOpc = getExtOpc(*SupportsExt);
15079// If we need an extension, we should be changing the type. 15081auto [
Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15088return DAG.
getNode(ExtOpc,
DL, NarrowVT, Source, Mask, VL);
15100 DAG.
getUNDEF(NarrowVT), Source, VL);
15102// Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL 15103// and that operand should already have the right NarrowVT so no 15104// extension should be required at this point. 15109 /// Helper function to get the narrow type for \p Root. 15110 /// The narrow type is the type of \p Root where we divided the size of each 15111 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. 15112 /// \pre Both the narrow type and the original type should be legal. 15113staticMVT getNarrowType(
constSDNode *Root, ExtKind SupportsExt) {
15116// Determine the narrow size. 15119MVT EltVT = SupportsExt == ExtKind::FPExt
15121 :
MVT::getIntegerVT(NarrowSize);
15123assert((
int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15124"Trying to extend something we can't represent");
15129 /// Get the opcode to materialize: 15130 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b) 15131staticunsigned getSExtOpcode(
unsigned Opcode) {
15152 /// Get the opcode to materialize: 15153 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b) 15154staticunsigned getZExtOpcode(
unsigned Opcode) {
15178 /// Get the opcode to materialize: 15179 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b) 15180staticunsigned getFPExtOpcode(
unsigned Opcode) {
15203 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> 15204 /// newOpcode(a, b). 15205staticunsigned getSUOpcode(
unsigned Opcode) {
15207"SU is only supported for MUL");
15211 /// Get the opcode to materialize 15212 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b). 15213staticunsigned getWOpcode(
unsigned Opcode, ExtKind SupportsExt) {
15233usingCombineToTry = std::function<std::optional<CombineResult>(
15234SDNode *
/*Root*/,
const NodeExtensionHelper &
/*LHS*/,
15238 /// Check if this node needs to be fully folded or extended for all users. 15239bool needToPromoteOtherUsers()
const{
return EnforceOneUse; }
15247"Unexpected Opcode");
15249// The pasthru must be undef for tail agnostic. 15253// Get the scalar value. 15257// See if we have enough sign bits or zero bits in the scalar to use a 15258// widening opcode by splatting to smaller element size. 15260unsigned ScalarBits =
Op.getValueSizeInBits();
15261// If we're not getting all bits from the element, we need special handling. 15262if (ScalarBits < EltBits) {
15263// This should only occur on RV32. 15265 !Subtarget.
is64Bit() &&
"Unexpected splat");
15266// vmv.v.x sign extends narrow inputs. 15267 SupportsSExt =
true;
15269// If the input is positive, then sign extend is also zero extend. 15271 SupportsZExt =
true;
15273 EnforceOneUse =
false;
15277unsigned NarrowSize = EltBits / 2;
15278// If the narrow type cannot be expressed with a legal VMV, 15279// this is not a valid candidate. 15284 SupportsSExt =
true;
15288 SupportsZExt =
true;
15290 EnforceOneUse =
false;
15293bool isSupportedFPExtend(
SDNode *Root,
MVT NarrowEltVT,
15295// Any f16 extension will neeed zvfh 15298// The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with 15300if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15306 /// Helper method to set the various fields of this struct based on the 15307 /// type of \p Root. 15310 SupportsZExt =
false;
15311 SupportsSExt =
false;
15312 SupportsFPExt =
false;
15313 EnforceOneUse =
true;
15315// For the nodes we handle below, we end up using their inputs directly: see 15316// getSource(). However since they either don't have a passthru or we check 15317// that their passthru is undef, we can safely ignore their mask and VL. 15327// i1 types are legal but we can't select V{S,Z}EXT_VLs with them. 15336 SupportsZExt =
true;
15339 SupportsSExt =
true;
15344if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15346 SupportsFPExt =
true;
15351 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15363if (!isSupportedFPExtend(Root,
Op.getOperand(0).getSimpleValueType(),
15368unsigned ScalarBits =
Op.getOperand(0).getValueSizeInBits();
15369if (NarrowSize != ScalarBits)
15372 SupportsFPExt =
true;
15380 /// Check if \p Root supports any extension folding combines. 15381staticbool isSupportedRoot(
constSDNode *Root,
15393// Vector Widening Integer Add/Sub/Mul Instructions 15401// Vector Widening Floating-Point Add/Sub/Mul Instructions 15410 Subtarget.hasStdExtZvbb();
15412return Subtarget.hasStdExtZvbb();
15423 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). 15426assert(isSupportedRoot(Root, Subtarget) &&
15427"Trying to build an helper with an " 15428"unsupported root");
15429assert(OperandIdx < 2 &&
"Requesting something else than LHS or RHS");
15436// VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS)) 15437// VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS)) 15438// VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS)) 15445if (OperandIdx == 1) {
15452// There's no existing extension here, so we don't have to worry about 15453// making sure it gets removed. 15454 EnforceOneUse =
false;
15459 fillUpExtensionSupport(Root, DAG, Subtarget);
15464 /// Helper function to get the Mask and VL from \p Root. 15465static std::pair<SDValue, SDValue>
15468assert(isSupportedRoot(Root, Subtarget) &&
"Unexpected root");
15484 /// Helper function to check if \p N is commutative with respect to the 15485 /// foldings that are supported by this class. 15487switch (
N->getOpcode()) {
15517 /// Get a list of combine to try for folding extensions in \p Root. 15518 /// Note that each returned CombineToTry function doesn't actually modify 15519 /// anything. Instead they produce an optional CombineResult that if not None, 15520 /// need to be materialized for the combine to be applied. 15521 /// \see CombineResult::materialize. 15522 /// If the related CombineToTry function returns std::nullopt, that means the 15523 /// combine didn't match. 15527/// Helper structure that holds all the necessary information to materialize a 15528/// combine that does some extension folding. 15529structCombineResult {
15530 /// Opcode to be generated when materializing the combine. 15531unsigned TargetOpcode;
15532// No value means no extension is needed. 15533 std::optional<ExtKind> LHSExt;
15534 std::optional<ExtKind> RHSExt;
15535 /// Root of the combine. 15537 /// LHS of the TargetOpcode. 15538 NodeExtensionHelper
LHS;
15539 /// RHS of the TargetOpcode. 15540 NodeExtensionHelper
RHS;
15542 CombineResult(
unsigned TargetOpcode,
SDNode *Root,
15543const NodeExtensionHelper &
LHS, std::optional<ExtKind> LHSExt,
15544const NodeExtensionHelper &
RHS, std::optional<ExtKind> RHSExt)
15545 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15548 /// Return a value that uses TargetOpcode and that can be used to replace 15550 /// The actual replacement is *not* done in that method. 15554 std::tie(Mask, VL) =
15555 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15569LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15570RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15571 Passthru, Mask, VL);
15575/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 15576/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 15577/// are zext) and LHS and RHS can be folded into Root. 15578/// AllowExtMask define which form `ext` can take in this pattern. 15580/// \note If the pattern can match with both zext and sext, the returned 15581/// CombineResult will feature the zext result. 15583/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 15584/// can be used to apply the pattern. 15585static std::optional<CombineResult>
15586canFoldToVWWithSameExtensionImpl(
SDNode *Root,
const NodeExtensionHelper &LHS,
15587const NodeExtensionHelper &RHS,
15590if ((AllowExtMask & ExtKind::ZExt) &&
LHS.SupportsZExt &&
RHS.SupportsZExt)
15591return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->
getOpcode()),
15592 Root, LHS,
/*LHSExt=*/{ExtKind::ZExt}, RHS,
15593/*RHSExt=*/{ExtKind::ZExt});
15594if ((AllowExtMask & ExtKind::SExt) &&
LHS.SupportsSExt &&
RHS.SupportsSExt)
15595return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->
getOpcode()),
15596 Root, LHS,
/*LHSExt=*/{ExtKind::SExt}, RHS,
15597/*RHSExt=*/{ExtKind::SExt});
15598if ((AllowExtMask & ExtKind::FPExt) &&
LHS.SupportsFPExt &&
RHS.SupportsFPExt)
15599return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->
getOpcode()),
15600 Root, LHS,
/*LHSExt=*/{ExtKind::FPExt}, RHS,
15601/*RHSExt=*/{ExtKind::FPExt});
15602return std::nullopt;
15605/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 15606/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 15607/// are zext) and LHS and RHS can be folded into Root. 15609/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 15610/// can be used to apply the pattern. 15611static std::optional<CombineResult>
15612canFoldToVWWithSameExtension(
SDNode *Root,
const NodeExtensionHelper &LHS,
15615return canFoldToVWWithSameExtensionImpl(
15616 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15620/// Check if \p Root follows a pattern Root(LHS, ext(RHS)) 15622/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 15623/// can be used to apply the pattern. 15624static std::optional<CombineResult>
15625canFoldToVW_W(
SDNode *Root,
const NodeExtensionHelper &LHS,
15628if (
RHS.SupportsFPExt)
15629return CombineResult(
15630 NodeExtensionHelper::getWOpcode(Root->
getOpcode(), ExtKind::FPExt),
15631 Root, LHS,
/*LHSExt=*/std::nullopt, RHS,
/*RHSExt=*/{ExtKind::FPExt});
15633// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar 15635// Control this behavior behind an option (AllowSplatInVW_W) for testing 15638return CombineResult(
15639 NodeExtensionHelper::getWOpcode(Root->
getOpcode(), ExtKind::ZExt), Root,
15640 LHS,
/*LHSExt=*/std::nullopt, RHS,
/*RHSExt=*/{ExtKind::ZExt});
15642return CombineResult(
15643 NodeExtensionHelper::getWOpcode(Root->
getOpcode(), ExtKind::SExt), Root,
15644 LHS,
/*LHSExt=*/std::nullopt, RHS,
/*RHSExt=*/{ExtKind::SExt});
15645return std::nullopt;
15648/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) 15650/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 15651/// can be used to apply the pattern. 15652static std::optional<CombineResult>
15653canFoldToVWWithSEXT(
SDNode *Root,
const NodeExtensionHelper &LHS,
15656return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15660/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) 15662/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 15663/// can be used to apply the pattern. 15664static std::optional<CombineResult>
15665canFoldToVWWithZEXT(
SDNode *Root,
const NodeExtensionHelper &LHS,
15668return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15672/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS)) 15674/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 15675/// can be used to apply the pattern. 15676static std::optional<CombineResult>
15677canFoldToVWWithFPEXT(
SDNode *Root,
const NodeExtensionHelper &LHS,
15680return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15684/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) 15686/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 15687/// can be used to apply the pattern. 15688static std::optional<CombineResult>
15689canFoldToVW_SU(
SDNode *Root,
const NodeExtensionHelper &LHS,
15693if (!
LHS.SupportsSExt || !
RHS.SupportsZExt)
15694return std::nullopt;
15695return CombineResult(NodeExtensionHelper::getSUOpcode(Root->
getOpcode()),
15696 Root, LHS,
/*LHSExt=*/{ExtKind::SExt}, RHS,
15697/*RHSExt=*/{ExtKind::ZExt});
15701NodeExtensionHelper::getSupportedFoldings(
constSDNode *Root) {
15711// add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub 15712 Strategies.
push_back(canFoldToVWWithSameExtension);
15713// add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w 15721 Strategies.
push_back(canFoldToVWWithSameExtension);
15726 Strategies.
push_back(canFoldToVWWithSameExtension);
15733 Strategies.
push_back(canFoldToVWWithZEXT);
15737// vwadd_w|vwsub_w -> vwadd|vwsub 15738 Strategies.
push_back(canFoldToVWWithSEXT);
15742// vwaddu_w|vwsubu_w -> vwaddu|vwsubu 15743 Strategies.
push_back(canFoldToVWWithZEXT);
15747// vfwadd_w|vfwsub_w -> vfwadd|vfwsub 15748 Strategies.
push_back(canFoldToVWWithFPEXT);
15755}
// End anonymous namespace. 15757/// Combine a binary or FMA operation to its equivalent VW or VW_W form. 15758/// The supported combines are: 15759/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w 15760/// sub | sub_vl -> vwsub(u) | vwsub(u)_w 15761/// mul | mul_vl -> vwmul(u) | vwmul_su 15762/// shl | shl_vl -> vwsll 15763/// fadd_vl -> vfwadd | vfwadd_w 15764/// fsub_vl -> vfwsub | vfwsub_w 15765/// fmul_vl -> vfwmul 15766/// vwadd_w(u) -> vwadd(u) 15767/// vwsub_w(u) -> vwsub(u) 15768/// vfwadd_w -> vfwadd 15769/// vfwsub_w -> vfwsub 15777if (!NodeExtensionHelper::isSupportedRoot(
N, Subtarget))
15783 Inserted.insert(
N);
15786while (!Worklist.
empty()) {
15789 NodeExtensionHelper
LHS(Root, 0, DAG, Subtarget);
15790 NodeExtensionHelper
RHS(Root, 1, DAG, Subtarget);
15791auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15792 &Inserted](
const NodeExtensionHelper &
Op) {
15793if (
Op.needToPromoteOtherUsers()) {
15796if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15798// We only support the first 2 operands of FMA. 15801if (Inserted.insert(TheUser).second)
15808// Control the compile time by limiting the number of node we look at in 15814 NodeExtensionHelper::getSupportedFoldings(Root);
15816assert(!FoldingStrategies.
empty() &&
"Nothing to be folded");
15817bool Matched =
false;
15818for (
int Attempt = 0;
15819 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15822for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15823 FoldingStrategies) {
15824 std::optional<CombineResult> Res =
15825 FoldingStrategy(Root,
LHS,
RHS, DAG, Subtarget);
15829// All the inputs that are extended need to be folded, otherwise 15830// we would be leaving the old input (since it is may still be used), 15832if (Res->LHSExt.has_value())
15833if (!AppendUsersIfNeeded(
LHS))
15835if (Res->RHSExt.has_value())
15836if (!AppendUsersIfNeeded(
RHS))
15843// Right now we do an all or nothing approach. 15847// Store the value for the replacement of the input node separately. 15849// We do the RAUW after we materialize all the combines, because some replaced 15850// nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, 15851// some of these nodes may appear in the NodeExtensionHelpers of some of the 15852// yet-to-be-visited CombinesToApply roots. 15855for (CombineResult Res : CombinesToApply) {
15856SDValue NewValue = Res.materialize(DAG, Subtarget);
15857if (!InputRootReplacement) {
15859"First element is expected to be the current node");
15860 InputRootReplacement = NewValue;
15865for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15869return InputRootReplacement;
15872// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond 15873// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond 15874// y will be the Passthru and cond will be the Mask. 15876unsigned Opc =
N->getOpcode();
15882unsigned MergeOpc = MergeOp.
getOpcode();
15892// Passthru should be undef 15893SDValue Passthru =
N->getOperand(2);
15897// Mask should be all ones 15902// False value of MergeOp should be all zeros 15907 Z = Z.getOperand(1);
15913 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15920 [[maybe_unused]]
unsigned Opc =
N->getOpcode();
15930// Helper function for performMemPairCombine. 15931// Try to combine the memory loads/stores LSNode1 and LSNode2 15932// into a single memory pair operation. 15946// The new operation has twice the width. 15949EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15955auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15957if (MemVT == MVT::i32)
15963 Opcode,
SDLoc(LSNode1), DAG.
getVTList({XLenVT, XLenVT, MVT::Other}),
15989// Try to combine two adjacent loads/stores to a single pair instruction from 15990// the XTHeadMemPair vendor extension. 15997// Target does not support load/store pair. 15998if (!Subtarget.hasVendorXTHeadMemPair())
16005// No volatile, indexed or atomic loads/stores. 16009// Function to get a base + constant representation from a memory value. 16010auto ExtractBaseAndOffset = [](
SDValuePtr) -> std::pair<SDValue, uint64_t> {
16012if (
auto *C1 = dyn_cast<ConstantSDNode>(
Ptr->getOperand(1)))
16013return {
Ptr->getOperand(0), C1->getZExtValue()};
16017auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->
getOperand(OpNum));
16025// No volatile, indexed or atomic loads/stores. 16029// Check if LSNode1 and LSNode2 have the same type and extension. 16038auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->
getOperand(OpNum));
16040// Check if the base pointer is the same for both instruction. 16044// Check if the offsets match the XTHeadMemPair encoding contraints. 16046if (MemVT == MVT::i32) {
16047// Check for adjacent i32 values and a 2-bit index. 16048if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16050 }
elseif (MemVT == MVT::i64) {
16051// Check for adjacent i64 values and a 2-bit index. 16052if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16070// (fp_to_int (froundeven X)) -> fcvt X, rne 16071// (fp_to_int (ftrunc X)) -> fcvt X, rtz 16072// (fp_to_int (ffloor X)) -> fcvt X, rdn 16073// (fp_to_int (fceil X)) -> fcvt X, rup 16074// (fp_to_int (fround X)) -> fcvt X, rmm 16075// (fp_to_int (frint X)) -> fcvt X 16085// Don't do this for strict-fp Src. 16086if (Src->isStrictFPOpcode())
16089// Ensure the FP type is legal. 16093// Don't do this for f16 with Zfhmin and not Zfh. 16094if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16098// If the result is invalid, we didn't find a foldable instruction. 16104EVT VT =
N->getValueType(0);
16107MVT SrcVT = Src.getSimpleValueType();
16108MVT SrcContainerVT = SrcVT;
16110SDValue XVal = Src.getOperand(0);
16112// For widening and narrowing conversions we just combine it into a 16113// VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They 16114// end up getting lowered to their appropriate pseudo instructions based on 16115// their operand types 16120// Make fixed-length vectors scalable first 16133// Use the dedicated trunc static rounding mode if we're truncating so we 16134// don't need to generate calls to fsrmi/fsrm 16137 FpToInt = DAG.
getNode(Opc,
DL, ContainerVT, XVal, Mask, VL);
16141 FpToInt = DAG.
getNode(Opc,
DL, ContainerVT, XVal, Mask,
16145// If converted from fixed-length to scalable, convert back 16152// Only handle XLen or i32 types. Other types narrower than XLen will 16153// eventually be legalized to XLenVT. 16154if (VT != MVT::i32 && VT != XLenVT)
16169// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) 16170// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) 16171// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) 16172// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) 16173// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) 16174// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) 16182// Only handle XLen types. Other types narrower than XLen will eventually be 16183// legalized to XLenVT. 16184EVT DstVT =
N->getValueType(0);
16185if (DstVT != XLenVT)
16190// Don't do this for strict-fp Src. 16191if (Src->isStrictFPOpcode())
16194// Ensure the FP type is also legal. 16198// Don't do this for f16 with Zfhmin and not Zfh. 16199if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16202EVT SatVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
16213elseif (DstVT == MVT::i64 && SatVT == MVT::i32)
16217// FIXME: Support other SatVTs by clamping before or after the conversion. 16219 Src = Src.getOperand(0);
16225// fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero 16230// RISC-V FP-to-int conversions saturate to the destination register size, but 16231// don't produce 0 for nan. 16236// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is 16237// smaller than XLenVT. 16240assert(Subtarget.hasStdExtZbkb() &&
"Unexpected extension");
16246EVT VT =
N->getValueType(0);
16258// vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK) 16260// Check if its first operand is a vp.load. 16261auto *VPLoad = dyn_cast<VPLoadSDNode>(
N->getOperand(0));
16265EVT LoadVT = VPLoad->getValueType(0);
16266// We do not have a strided_load version for masks, and the evl of vp.reverse 16267// and vp.load should always be the same. 16269N->getOperand(2) != VPLoad->getVectorLength() ||
16270 !
N->getOperand(0).hasOneUse())
16273// Check if the mask of outer vp.reverse are all 1's. 16277SDValue LoadMask = VPLoad->getMask();
16278// If Mask is all ones, then load is unmasked and can be reversed. 16280// If the mask is not all ones, we can reverse the load if the mask was also 16281// reversed by an unmasked vp.reverse with the same EVL. 16282if (LoadMask.
getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16284 LoadMask.
getOperand(2) != VPLoad->getVectorLength())
16289// Base = LoadAddr + (NumElem - 1) * ElemWidthByte 16292SDValue NumElem = VPLoad->getVectorLength();
16293uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16305 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16309 LoadVT,
DL, VPLoad->getChain(),
Base, Stride, LoadMask,
16310 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16320// vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR, 16322auto *VPStore = cast<VPStoreSDNode>(
N);
16324if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16327SDValue VPReverse = VPStore->getValue();
16330// We do not have a strided_store version for masks, and the evl of vp.reverse 16331// and vp.store should always be the same. 16333 VPStore->getVectorLength() != VPReverse.
getOperand(2) ||
16337SDValue StoreMask = VPStore->getMask();
16338// If Mask is all ones, then load is unmasked and can be reversed. 16340// If the mask is not all ones, we can reverse the store if the mask was 16341// also reversed by an unmasked vp.reverse with the same EVL. 16342if (StoreMask.
getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16344 StoreMask.
getOperand(2) != VPStore->getVectorLength())
16349// Base = StoreAddr + (NumElem - 1) * ElemWidthByte 16352SDValue NumElem = VPStore->getVectorLength();
16366 PtrInfo, VPStore->getMemOperand()->getFlags(),
16371 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16372 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16373 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16376// Convert from one FMA opcode to another based on whether we are negating the 16377// multiply result and/or the accumulator. 16378// NOTE: Only supports RVV operations with VL. 16380// Negating the multiply result changes ADD<->SUB and toggles 'N'. 16397// Negating the accumulator changes ADD<->SUB. 16418// Fold FNEG_VL into FMA opcodes. 16419// The first operand of strict-fp is chain. 16422unsignedOffset = IsStrict ? 1 : 0;
16429auto invertIfNegative = [&Mask, &VL](
SDValue &V) {
16431 V.getOperand(2) == VL) {
16432// Return the negated input. 16433 V = V.getOperand(0);
16440bool NegA = invertIfNegative(
A);
16441bool NegB = invertIfNegative(
B);
16442bool NegC = invertIfNegative(
C);
16444// If no operands are negated, we're done. 16445if (!NegA && !NegB && !NegC)
16451 {N->getOperand(0), A, B, C, Mask, VL});
16464// FIXME: Ignore strict opcodes for now. 16475EVT VT =
N->getValueType(0);
16480if (!isa<ConstantSDNode>(
N->getOperand(1)))
16482uint64_t ShAmt =
N->getConstantOperandVal(1);
16486// Combine (sra (sext_inreg (shl X, C1), iX), C2) -> 16487// (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI. 16490 cast<VTSDNode>(N0.
getOperand(1))->getVT().getSizeInBits();
16495if (LShAmt < ExtSize) {
16508if (ShAmt > 32 || VT != MVT::i64)
16511// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) 16512// FIXME: Should this be a generic combine? There's a similar combine on X86. 16514// Also try these folds where an add or sub is in the middle. 16515// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) 16516// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) 16520// We might have an ADD or SUB between the SRA and SHL. 16523// Other operand needs to be a constant we can modify. 16524 AddC = dyn_cast<ConstantSDNode>(N0.
getOperand(IsAdd ? 1 : 0));
16528// AddC needs to have at least 32 trailing zeros. 16532// All users should be a shift by constant less than or equal to 32. This 16533// ensures we'll do this optimization for each of them to produce an 16534// add/sub+sext_inreg they can all share. 16537 !isa<ConstantSDNode>(U->getOperand(1)) ||
16538 U->getConstantOperandVal(1) > 32)
16544// Not an ADD or SUB. 16548// Look for a shift left by 32. 16553// We if we didn't look through an add/sub, then the shl should have one use. 16554// If we did look through an add/sub, the sext_inreg we create is free so 16555// we're only creating 2 new instructions. It's enough to only remove the 16556// original sra+add/sub. 16563// If we looked through an ADD or SUB, we need to rebuild it with the shifted 16584// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if 16585// the result is used as the conditon of a br_cc or select_cc we can invert, 16586// inverting the setcc is free, and Z is 0/1. Caller will invert the 16593if (!
Cond.hasOneUse())
16598// Canonicalize setcc to LHS. 16601// LHS should be a setcc and RHS should be an xor. 16606// If the condition is an And, SimplifyDemandedBits may have changed 16607// (xor Z, 1) to (not Z). 16615// The LHS of the xor needs to be 0/1. 16620// We can only invert integer setccs. 16631// Invert (setlt 0, X) by converting to (setlt X, 1). 16635// (setlt X, 1) by converting to (setlt 0, X). 16646// Perform common combines for BR_CC and SELECT_CC condtions. 16651// As far as arithmetic right shift always saves the sign, 16652// shift can be omitted. 16653// Fold setlt (sra X, N), 0 -> setlt X, 0 and 16654// setge (sra X, N), 0 -> setge X, 0 16664// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) 16665// Sometimes the setcc is introduced after br_cc/select_cc has been formed. 16667LHS.getOperand(0).getValueType() == Subtarget.
getXLenVT()) {
16668// If we're looking for eq 0 instead of ne 0, we need to invert the 16671 CCVal = cast<CondCodeSDNode>(
LHS.getOperand(2))->get();
16683// Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) 16690// Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) 16702 ShAmt =
LHS.getValueSizeInBits() - 1 - ShAmt;
16713// (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. 16714// This can occur when legalizing some floating point comparisons. 16736// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). 16737// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). 16738// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). 16739// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). 16743bool Commutative =
true;
16744unsigned Opc = TrueVal.getOpcode();
16752 Commutative =
false;
16760if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16764if (FalseVal == TrueVal.getOperand(0))
16766elseif (Commutative && FalseVal == TrueVal.getOperand(1))
16771EVT VT =
N->getValueType(0);
16773SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16779assert(IdentityOperand &&
"No identity operand!");
16784 DAG.
getSelect(
DL, OtherOpVT,
N->getOperand(0), OtherOp, IdentityOperand);
16785return DAG.
getNode(TrueVal.getOpcode(),
DL, VT, FalseVal, NewSel);
16788// This tries to get rid of `select` and `icmp` that are being used to handle 16789// `Targets` that do not support `cttz(0)`/`ctlz(0)`. 16793// This represents either CTTZ or CTLZ instruction. 16806 CountZeroes =
N->getOperand(2);
16807 ValOnZero =
N->getOperand(1);
16809 CountZeroes =
N->getOperand(1);
16810 ValOnZero =
N->getOperand(2);
16829if (
Cond->getOperand(0) != CountZeroesArgument)
16845 CountZeroes, BitWidthMinusOne);
16855EVT VT =
N->getValueType(0);
16856EVT CondVT =
Cond.getValueType();
16861// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate 16862// BEXTI, where C is power of 2. 16864 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16870constAPInt &MaskVal =
LHS.getConstantOperandAPInt(1);
16881if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
16895// Add is commutative, so check both orders 16896return ((TrueVal.getOperand(0) ==
A && TrueVal.getOperand(1) ==
B) ||
16897 (TrueVal.getOperand(1) ==
A && TrueVal.getOperand(0) ==
B));
16900/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b). 16901/// This allows us match a vadd.vv fed by a masked vrsub, which reduces 16902/// register pressure over the add followed by masked vsub sequence. 16905EVT VT =
N->getValueType(0);
16908SDValue FalseVal =
N->getOperand(2);
16914SDValue Sub = SwapCC ? TrueVal : FalseVal;
16918// Arrange the select such that we can match a masked 16919// vrsub.vi to perform the conditional negate 16939SDValue FalseVal =
N->getOperand(2);
16945/// If we have a build_vector where each lane is binop X, C, where C 16946/// is a constant (but not necessarily the same constant on all lanes), 16947/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). 16948/// We assume that materializing a constant build vector will be no more 16949/// expensive that performing O(n) binops. 16954EVT VT =
N->getValueType(0);
16961constunsigned Opcode =
N->op_begin()->getNode()->getOpcode();
16968// This BUILD_VECTOR involves an implicit truncation, and sinking 16969// truncates through binops is non-trivial. 16977// We can't form a divide or remainder from undef. 16986// TODO: We can handle operations which have an neutral rhs value 16987// (e.g. x + 0, a * 1 or a << 0), but we then have to keep track 16988// of profit in a more explicit manner. 16989if (
Op.getOpcode() != Opcode || !
Op.hasOneUse())
16993if (!isa<ConstantSDNode>(
Op.getOperand(1)) &&
16994 !isa<ConstantFPSDNode>(
Op.getOperand(1)))
16996// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 16997// have different LHS and RHS types. 16998if (
Op.getOperand(0).getValueType() !=
Op.getOperand(1).getValueType())
17023// Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt 17024// move the insert_vector_elts into the arms of the binop. Note that 17025// the new RHS must be a constant. 17026constunsigned InVecOpcode = InVec->
getOpcode();
17036if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
17038// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 17039// have different LHS and RHS types. 17043 InVecLHS, InValLHS, EltNo);
17045 InVecRHS, InValRHS, EltNo);
17049// Given insert_vector_elt (concat_vectors ...), InVal, Elt 17050// move the insert_vector_elt to the source operand of the concat_vector. 17054auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17057unsigned Elt = IndexC->getZExtValue();
17065unsigned ConcatOpIdx = Elt / ConcatNumElts;
17068 ConcatOp, InVal, NewIdx);
17072 ConcatOps[ConcatOpIdx] = ConcatOp;
17076// If we're concatenating a series of vector loads like 17077// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... 17078// Then we can turn this into a strided load by widening the vector elements 17079// vlse32 p, stride=n 17084EVT VT =
N->getValueType(0);
17086// Only perform this combine on legal MVTs. 17090// TODO: Potentially extend this to scalable vectors 17094auto *BaseLd = dyn_cast<LoadSDNode>(
N->getOperand(0));
17096 !
SDValue(BaseLd, 0).hasOneUse())
17099EVT BaseLdVT = BaseLd->getValueType(0);
17101// Go through the loads and check that they're strided 17106auto *Ld = dyn_cast<LoadSDNode>(
Op);
17107if (!Ld || !Ld->isSimple() || !
Op.hasOneUse() ||
17109 Ld->getValueType(0) != BaseLdVT)
17114// The common alignment is the most restrictive (smallest) of all the loads 17118usingPtrDiff = std::pair<std::variant<int64_t, SDValue>,
bool>;
17121// If the load ptrs can be decomposed into a common (Base + Index) with a 17122// common constant stride, then return the constant stride. 17125if (BIO1.equalBaseIndex(BIO2, DAG))
17128// Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) 17130SDValue P2 = Ld2->getBasePtr();
17133if (P1.getOpcode() ==
ISD::ADD && P1.getOperand(0) == P2)
17134return {{P1.getOperand(1),
true}};
17136return std::nullopt;
17139// Get the distance between the first and second loads 17140auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17144// Check all the loads are the same distance apart 17145for (
auto *It = Lds.
begin() + 1; It != Lds.
end() - 1; It++)
17146if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17149// TODO: At this point, we've successfully matched a generalized gather 17150// load. Maybe we should emit that, and then move the specialized 17151// matchers above and below into a DAG combine? 17153// Get the widened scalar type, e.g. v4i8 -> i64 17154unsigned WideScalarBitWidth =
17158// Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 17163// Check that the operation is legal 17167auto [StrideVariant, MustNegateStride] = *BaseDiff;
17169 std::holds_alternative<SDValue>(StrideVariant)
17170 ? std::get<SDValue>(StrideVariant)
17173if (MustNegateStride)
17181if (
auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17182 ConstStride && ConstStride->getSExtValue() >= 0)
17183// total size = (elsize * n) + (stride - elsize) * (n-1) 17184// = elsize + stride * (n-1) 17186 ConstStride->getSExtValue() * (
N->getNumOperands() - 1);
17188// If Stride isn't constant, then we can't know how much it will load 17192 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17196 WideVecVT,
DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17210EVT VT =
N->getValueType(0);
17218// Recognized a disguised select of add/sub. 17222SDValue Sub = SwapCC ? V1 : V2;
17227for (
int MaskIndex : Mask) {
17228bool SelectMaskVal = (MaskIndex < (int)NumElts);
17231assert(MaskVals.
size() == NumElts &&
"Unexpected select-like shuffle");
17235// Arrange the select such that we can match a masked 17236// vrsub.vi to perform the conditional negate 17244// Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs 17245// during the combine phase before type legalization, and relies on 17246// DAGCombine not undoing the transform if isShuffleMaskLegal returns false 17247// for the source mask. 17269if (
N->getValueType(0).isFixedLengthVector())
17276SDValue AddPassthruOp =
N->getOperand(2);
17281auto IsVWMulOpc = [](
unsigned Opc) {
17310return std::make_pair(
N->getOperand(3),
N->getOperand(4));
17311 }(
N, DAG, Subtarget);
17316if (AddMask != MulMask || AddVL != MulVL)
17321"Unexpected opcode after VWMACC_VL");
17323"Unexpected opcode after VWMACC_VL!");
17325"Unexpected opcode after VWMUL_VL!");
17327"Unexpected opcode after VWMUL_VL!");
17330EVT VT =
N->getValueType(0);
17346constEVT IndexVT = Index.getValueType();
17348// RISC-V indexed loads only support the "unsigned unscaled" addressing 17349// mode, so anything else must be manually legalized. 17350if (!isIndexTypeSigned(IndexType))
17354// Any index legalization should first promote to XLenVT, so we don't lose 17355// bits when scaling. This may create an illegal index type so we let 17356// LLVM's legalization take care of the splitting. 17357// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. 17365/// Match the index vector of a scatter or gather node as the shuffle mask 17366/// which performs the rearrangement if possible. Will only match if 17367/// all lanes are touched, and thus replacing the scatter or gather with 17368/// a unit strided access and shuffle is legal. 17379// Create the shuffle mask and check all bits active 17382for (
unsigned i = 0; i < Index->getNumOperands(); i++) {
17383// TODO: We've found an active bit of UB, and could be 17384// more aggressive here if desired. 17385if (Index->getOperand(i)->isUndef())
17387uint64_tC = Index->getConstantOperandVal(i);
17388if (
C % ElementSize != 0)
17394 ActiveLanes.
set(
C);
17396return ActiveLanes.
all();
17399/// Match the index of a gather or scatter operation as an operation 17400/// with twice the element width and half the number of elements. This is 17401/// generally profitable (if legal) because these operations are linear 17402/// in VL, so even if we cause some extract VTYPE/VL toggles, we still 17411// Attempt a doubling. If we can use a element type 4x or 8x in 17412// size, this will happen via multiply iterations of the transform. 17414if (NumElems % 2 != 0)
17418constunsigned WiderElementSize = ElementSize * 2;
17419if (WiderElementSize > ST.getELen()/8)
17422if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17425for (
unsigned i = 0; i < Index->getNumOperands(); i++) {
17426// TODO: We've found an active bit of UB, and could be 17427// more aggressive here if desired. 17428if (Index->getOperand(i)->isUndef())
17430// TODO: This offset check is too strict if we support fully 17431// misaligned memory operations. 17432uint64_tC = Index->getConstantOperandVal(i);
17434if (
C % WiderElementSize != 0)
17439if (
C !=
Last + ElementSize)
17445// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) 17446// This would be benefit for the cases where X and Y are both the same value 17447// type of low precision vectors. Since the truncate would be lowered into 17448// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate 17449// restriction, such pattern would be expanded into a series of "vsetvli" 17450// and "vnsrl" instructions later to reach this point. 17456 (isa<RegisterSDNode>(VL) &&
17457 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17459 Mask.getOperand(0) != VL)
17462auto IsTruncNode = [&](
SDValue V) {
17464 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17469// We need to first find the inner level of TRUNCATE_VECTOR_VL node 17470// to distinguish such pattern. 17471while (IsTruncNode(
Op)) {
17472if (!
Op.hasOneUse())
17474Op =
Op.getOperand(0);
17500// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the 17501// maximum value for the truncated type. 17502// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1 17503// is the signed maximum value for the truncated type and C2 is the signed 17509MVT VT =
N->getSimpleValueType(0);
17514auto MatchMinMax = [&VL, &Mask](
SDValue V,
unsigned Opc,
unsigned OpcVL,
17516if (V.getOpcode() != Opc &&
17517 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17518 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17523// Peek through conversion between fixed and scalable vectors. 17526Op.getOperand(1).getValueType().isFixedLengthVector() &&
17528Op.getOperand(1).getOperand(0).getValueType() ==
Op.getValueType() &&
17530Op =
Op.getOperand(1).getOperand(0);
17533return V.getOperand(0);
17536Op.getOperand(2) == VL) {
17537if (
auto *Op1 = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
17539 Op1->getAPIntValue().sextOrTrunc(
Op.getScalarValueSizeInBits());
17540return V.getOperand(0);
17549auto DetectUSatPattern = [&](
SDValue V) {
17552// Simple case, V is a UMIN. 17557// If we have an SMAX that removes negative numbers first, then we can match 17558// SMIN instead of UMIN. 17565// If we have an SMIN before an SMAX and the SMAX constant is less than or 17566// equal to the SMIN constant, we can use vnclipu if we insert a new SMAX 17574 V.getOperand(1), DAG.
getUNDEF(V.getValueType()),
17580auto DetectSSatPattern = [&](
SDValue V) {
17582unsigned NumSrcBits = V.getScalarValueSizeInBits();
17590if (HiC == SignedMax && LoC == SignedMin)
17596if (HiC == SignedMax && LoC == SignedMin)
17604// Look through multiple layers of truncates. 17606 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17608 Src = Src.getOperand(0);
17612if ((Val = DetectUSatPattern(Src)))
17614elseif ((Val = DetectSSatPattern(Src)))
17624 Val = DAG.
getNode(ClipOpc,
DL, ValVT, Val, Mask, VL);
17625 }
while (ValVT != VT);
17631// (iX ctpop (bitcast (vXi1 A))) 17633// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A))))) 17634// FIXME: It's complicated to match all the variations of this after type 17635// legalization so we only handle the pre-type legalization pattern, but that 17636// requires the fixed vector type to be legal. 17639EVT VT =
N->getValueType(0);
17645// Peek through zero_extend. It doesn't change the count. 17647 Src = Src.getOperand(0);
17652 Src = Src.getOperand(0);
17653EVT SrcEVT = Src.getValueType();
17658// Make sure the input is an i1 vector. 17682// Helper to call SimplifyDemandedBits on an operand of N where only some low 17683// bits are demanded. N will be added to the Worklist if it was not deleted. 17684// Caller should return SDValue(N, 0) if this returns true. 17685auto SimplifyDemandedLowBitsHelper = [&](
unsigned OpNo,
unsigned LowBits) {
17696switch (
N->getOpcode()) {
17701// If the input to SplitF64 is just BuildPairF64 then the operation is 17702// redundant. Instead, use BuildPairF64's operands directly. 17712// It's cheaper to materialise two 32-bit integers than to load a double 17713// from the constant pool and transfer it to integer registers through the 17716APInt V =
C->getValueAPF().bitcastToAPInt();
17722// This is a target-specific version of a DAGCombine performed in 17723// DAGCombiner::visitBITCAST. It performs the equivalent of: 17724// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 17725// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 17750// Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 17751if (SimplifyDemandedLowBitsHelper(0, 32) ||
17752 SimplifyDemandedLowBitsHelper(1, 5))
17759// Only the lower 32 bits of the first operand are read 17760if (SimplifyDemandedLowBitsHelper(0, 32))
17765// If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the 17766// conversion is unnecessary and can be replaced with the 17767// FMV_X_ANYEXTW_RV64 operand. 17777MVT VT =
N->getSimpleValueType(0);
17780if (
auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17785// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 17786// conversion is unnecessary and can be replaced with the FMV_W_X_RV64 17787// operand. Similar for FMV_X_ANYEXTH and FMV_H_X. 17793"Unexpected value type!");
17798 cast<LoadSDNode>(Op0)->isSimple()) {
17800auto *LN0 = cast<LoadSDNode>(Op0);
17803 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17808// This is a target-specific version of a DAGCombine performed in 17809// DAGCombiner::visitBITCAST. It performs the equivalent of: 17810// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 17811// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 17827EVT VT =
N->getValueType(0);
17829// abs (sext) -> zext (abs) 17830// abs (zext) -> zext (handled elsewhere) 17872// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y 17880if (!
C || !
C->getValueAPF().isExactlyValue(+1.0))
17882EVT VT =
N->getValueType(0);
17908// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during 17909// type legalization. This is safe because fp_to_uint produces poison if 17911if (
N->getValueType(0) == MVT::i64 && Subtarget.
is64Bit()) {
17916 Src.getOperand(0));
17921 Src.getOperand(0), Src.getOperand(1));
17925returnSDValue(
N, 0);
// Return N so it doesn't get rechecked. 17944unsigned Opc =
N->getOpcode();
17946// czero_eqz x, x -> x 17953// czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1. 17954// czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1. 17959return DAG.
getNode(InvOpc,
SDLoc(
N),
N->getValueType(0), Val, NewCond);
17961// czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y 17962// czero_nez x, (setcc y, 0, ne) -> czero_nez x, y 17963// czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y 17964// czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y 17969N->getValueType(0), Val,
Cond.getOperand(0));
17982EVT VT =
N->getValueType(0);
17984// If the True and False values are the same, we don't need a select_cc. 17985if (TrueV == FalseV)
17988// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z 17989// (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y 17990if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17996 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17997 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17998// Only handle simm12, if it is not in this range, it can be considered as 18000if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
18001 isInt<12>(TrueSImm - FalseSImm)) {
18017 {LHS, RHS, CC, TrueV, FalseV});
18020// (select c, -1, y) -> -c | y 18026// (select c, y, -1) -> -!c | y 18034// (select c, 0, y) -> -!c & y 18041// (select c, y, 0) -> -c & y 18047// (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) 18048// (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) 18054// freeze it to be safe. 18061// If both true/false are an xor with 1, pull through the select. 18062// This can occur after op legalization if both operands are setccs that 18063// require an xor to invert. 18064// FIXME: Generalize to other binary ops with identical operand? 18084N->getOperand(0),
LHS,
RHS,
CC,
N->getOperand(4));
18097EVT VT =
N->getValueType(0);
18100// There is a form of VFSGNJ which injects the negated sign of its second 18101// operand. Try and bubble any FNEG up after the extend/round to produce 18102// this optimized pattern. Avoid modifying cases where FP_ROUND and 18105// Avoid cases where the extend/round has multiple uses, as duplicating 18106// those is typically more expensive than removing a fneg. 18121constauto *MGN = cast<MaskedGatherSDNode>(
N);
18122constEVT VT =
N->getValueType(0);
18123SDValue Index = MGN->getIndex();
18124SDValue ScaleOp = MGN->getScale();
18126assert(!MGN->isIndexScaled() &&
18127"Scaled gather/scatter should not be formed");
18132N->getVTList(), MGN->getMemoryVT(),
DL,
18133 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18134 MGN->getBasePtr(), Index, ScaleOp},
18135 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18139N->getVTList(), MGN->getMemoryVT(),
DL,
18140 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18141 MGN->getBasePtr(), Index, ScaleOp},
18142 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18146// The sequence will be XLenVT, not the type of Index. Tell 18147// isSimpleVIDSequence this so we avoid overflow. 18148if (std::optional<VIDSequence> SimpleVID =
18150 SimpleVID && SimpleVID->StepDenominator == 1) {
18151const int64_t StepNumerator = SimpleVID->StepNumerator;
18152const int64_t Addend = SimpleVID->Addend;
18154// Note: We don't need to check alignment here since (by assumption 18155// from the existance of the gather), our offsets must be sufficiently 18159assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18167 VT,
DL, MGN->getChain(), BasePtr,
18169 EVL, MGN->getMemOperand());
18171 StridedLoad, MGN->getPassThru(), EVL);
18181 MGN->getBasePtr(), DAG.
getUNDEF(XLenVT),
18183 MGN->getMemoryVT(), MGN->getMemOperand(),
18192 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18194for (
unsigned i = 0; i < Index->getNumOperands(); i += 2)
18195 NewIndices.
push_back(Index.getOperand(i));
18196EVT IndexVT = Index.getValueType()
18197 .getHalfNumVectorElementsVT(*DAG.
getContext());
18203assert(EltCnt.isKnownEven() &&
"Splitting vector, but not in half!");
18205 EltCnt.divideCoefficientBy(2));
18208 EltCnt.divideCoefficientBy(2));
18213 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18222constauto *MSN = cast<MaskedScatterSDNode>(
N);
18223SDValue Index = MSN->getIndex();
18224SDValue ScaleOp = MSN->getScale();
18226assert(!MSN->isIndexScaled() &&
18227"Scaled gather/scatter should not be formed");
18232N->getVTList(), MSN->getMemoryVT(),
DL,
18233 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18235 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18239N->getVTList(), MSN->getMemoryVT(),
DL,
18240 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18242 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18244EVT VT = MSN->getValue()->getValueType(0);
18246if (!MSN->isTruncatingStore() &&
18251 DAG.
getUNDEF(XLenVT), MSN->getMask(),
18252 MSN->getMemoryVT(), MSN->getMemOperand(),
18257case ISD::VP_GATHER: {
18258constauto *VPGN = cast<VPGatherSDNode>(
N);
18259SDValue Index = VPGN->getIndex();
18260SDValue ScaleOp = VPGN->getScale();
18262assert(!VPGN->isIndexScaled() &&
18263"Scaled gather/scatter should not be formed");
18268 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18269 ScaleOp, VPGN->getMask(),
18270 VPGN->getVectorLength()},
18271 VPGN->getMemOperand(), IndexType);
18275 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18276 ScaleOp, VPGN->getMask(),
18277 VPGN->getVectorLength()},
18278 VPGN->getMemOperand(), IndexType);
18282case ISD::VP_SCATTER: {
18283constauto *VPSN = cast<VPScatterSDNode>(
N);
18284SDValue Index = VPSN->getIndex();
18285SDValue ScaleOp = VPSN->getScale();
18287assert(!VPSN->isIndexScaled() &&
18288"Scaled gather/scatter should not be formed");
18293 {VPSN->getChain(), VPSN->getValue(),
18294 VPSN->getBasePtr(), Index, ScaleOp,
18295 VPSN->getMask(), VPSN->getVectorLength()},
18296 VPSN->getMemOperand(), IndexType);
18300 {VPSN->getChain(), VPSN->getValue(),
18301 VPSN->getBasePtr(), Index, ScaleOp,
18302 VPSN->getMask(), VPSN->getVectorLength()},
18303 VPSN->getMemOperand(), IndexType);
18314// We don't need the upper 32 bits of a 64-bit element for a shift amount. 18317EVT VT =
N->getValueType(0);
18320return DAG.
getNode(
N->getOpcode(),
DL, VT,
N->getOperand(0), ShAmt,
18321N->getOperand(2),
N->getOperand(3),
N->getOperand(4));
18337// We don't need the upper 32 bits of a 64-bit element for a shift amount. 18339EVT VT =
N->getValueType(0);
18343return DAG.
getNode(
N->getOpcode(),
DL, VT,
N->getOperand(0), ShAmt);
18383auto *Store = cast<StoreSDNode>(
N);
18384SDValue Chain = Store->getChain();
18385EVT MemVT = Store->getMemoryVT();
18386SDValue Val = Store->getValue();
18389bool IsScalarizable =
18391 Store->isSimple() &&
18396// If sufficiently aligned we can scalarize stores of constant vectors of 18397// any power-of-two size up to XLen bits, provided that they aren't too 18398// expensive to materialize. 18399// vsetivli zero, 2, e8, m1, ta, ma 18407// Get the constant vector bits 18421 NewVT, *Store->getMemOperand())) {
18423return DAG.
getStore(Chain,
DL, NewV, Store->getBasePtr(),
18424 Store->getPointerInfo(), Store->getOriginalAlign(),
18425 Store->getMemOperand()->getFlags());
18429// Similarly, if sufficiently aligned we can scalarize vector copies, e.g. 18430// vsetivli zero, 2, e16, m1, ta, ma 18433if (
auto *L = dyn_cast<LoadSDNode>(Val);
18435 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18437 L->getMemoryVT() == MemVT) {
18440 NewVT, *Store->getMemOperand()) &&
18442 NewVT, *L->getMemOperand())) {
18444 L->getPointerInfo(), L->getOriginalAlign(),
18445 L->getMemOperand()->getFlags());
18446return DAG.
getStore(Chain,
DL, NewL, Store->getBasePtr(),
18447 Store->getPointerInfo(), Store->getOriginalAlign(),
18448 Store->getMemOperand()->getFlags());
18452// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. 18453// vfmv.f.s is represented as extract element from 0. Match it late to avoid 18454// any illegal types. 18460MVT VecVT = Src.getSimpleValueType();
18461// VecVT should be scalable and memory VT should match the element type. 18467 Store->getChain(),
DL, Src, Store->getBasePtr(), Store->getOffset(),
18470 Store->getMemOperand(), Store->getAddressingMode(),
18471 Store->isTruncatingStore(),
/*IsCompress*/false);
18478EVT VT =
N->getValueType(0);
18479// Only perform this combine on legal MVT types. 18504constMVT VT =
N->getSimpleValueType(0);
18505SDValue Passthru =
N->getOperand(0);
18509// If VL is 1, we can use vfmv.s.f. 18515constMVT VT =
N->getSimpleValueType(0);
18516SDValue Passthru =
N->getOperand(0);
18520// Tail agnostic VMV.V.X only demands the vector element bitwidth from the 18522unsigned ScalarSize = Scalar.getValueSizeInBits();
18524if (ScalarSize > EltWidth && Passthru.
isUndef())
18525if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18528// If VL is 1 and the scalar value won't benefit from immediate, we can 18532 (!Const || Const->isZero() ||
18533 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18540// Try to remove vector->scalar->vector if the scalar->vector is inserting 18541// into an undef vector. 18542// TODO: Could use a vslide or vmv.v.v for non-undef. 18543if (
N->getOperand(0).isUndef() &&
18546 Src.getOperand(0).getValueType().isScalableVector()) {
18547EVT VT =
N->getValueType(0);
18548EVT SrcVT = Src.getOperand(0).getValueType();
18550// Widths match, just return the original vector. 18552return Src.getOperand(0);
18553// TODO: Use insert_subvector/extract_subvector to change widen/narrow? 18558constMVT VT =
N->getSimpleValueType(0);
18559SDValue Passthru =
N->getOperand(0);
18564 Scalar.getOperand(0).getValueType() ==
N->getValueType(0))
18565return Scalar.getOperand(0);
18567// Use M1 or smaller to avoid over constraining register allocation 18574 DAG.
getNode(
N->getOpcode(),
DL, M1VT, M1Passthru, Scalar, VL);
18580// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or 18581// higher would involve overly constraining the register allocator for 18584 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18592MVT VecVT =
N->getOperand(0).getSimpleValueType();
18594if (M1VT.
bitsLT(VecVT)) {
18605unsigned IntNo =
N->getConstantOperandVal(IntOpNo);
18607// By default we do not combine any intrinsic. 18610case Intrinsic::riscv_vcpop:
18611case Intrinsic::riscv_vcpop_mask:
18612case Intrinsic::riscv_vfirst:
18613case Intrinsic::riscv_vfirst_mask: {
18615if (IntNo == Intrinsic::riscv_vcpop_mask ||
18616 IntNo == Intrinsic::riscv_vfirst_mask)
18617 VL =
N->getOperand(3);
18620// If VL is 0, vcpop -> li 0, vfirst -> li -1. 18622EVT VT =
N->getValueType(0);
18623if (IntNo == Intrinsic::riscv_vfirst ||
18624 IntNo == Intrinsic::riscv_vfirst_mask)
18630case ISD::EXPERIMENTAL_VP_REVERSE:
18637EVT VT =
N->getValueType(0);
18648for (
unsigned i = 0; i < NF; ++i)
18653// If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer 18654// type, widen both sides to avoid a trip through memory. 18655if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18678EVT XVT,
unsigned KeptBits)
const{
18679// For vectors, we don't have a preference.. 18683if (XVT != MVT::i32 && XVT != MVT::i64)
18686// We can use sext.w for RV64 or an srai 31 on RV32. 18687if (KeptBits == 32 || KeptBits == 64)
18690// With Zbb we can use sext.h/sext.b. 18691return Subtarget.hasStdExtZbb() &&
18692 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.
is64Bit()) ||
18700"Expected shift op");
18702// The following folds are only desirable if `(OP _, c1 << c2)` can be 18703// materialised in fewer instructions than `(OP _, c1)`: 18705// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 18706// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 18710// LD/ST will optimize constant Offset extraction, so when AddNode is used by 18711// LD/ST, it can still complete the folding optimization operation performed 18715// This use is the one we're on right now. Skip it 18718if (!isa<StoreSDNode>(
Use) && !isa<LoadSDNode>(
Use))
18727return isUsedByLdSt(N0.
getNode(),
N);
18729auto *C1 = dyn_cast<ConstantSDNode>(N0->
getOperand(1));
18730auto *C2 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
18732// Bail if we might break a sh{1,2,3}add pattern. 18733if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18734 C2->getZExtValue() <= 3 &&
N->hasOneUse() &&
18735N->user_begin()->getOpcode() ==
ISD::ADD &&
18736 !isUsedByLdSt(*
N->user_begin(),
nullptr) &&
18737 !isa<ConstantSDNode>(
N->user_begin()->getOperand(1)))
18741constAPInt &C1Int = C1->getAPIntValue();
18742APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18744// We can materialise `c1 << c2` into an add immediate, so it's "free", 18745// and the combine should happen, to potentially allow further combines 18751// We can materialise `c1` in an add immediate, so it's "free", and the 18752// combine should be prevented. 18757// Neither constant will fit into an immediate, so find materialisation 18761/*CompressionCost*/true);
18764/*CompressionCost*/true);
18766// Materialising `c1` is cheaper than materialising `c1 << c2`, so the 18767// combine should be prevented. 18768if (C1Cost < ShiftedC1Cost)
18787// Delay this optimization as late as possible. 18791EVT VT =
Op.getValueType();
18795unsigned Opcode =
Op.getOpcode();
18803constAPInt &Mask =
C->getAPIntValue();
18805// Clear all non-demanded bits initially. 18808// Try to make a smaller immediate by setting undemanded bits. 18812auto IsLegalMask = [ShrunkMask, ExpandedMask](
constAPInt &Mask) ->
bool {
18813return ShrunkMask.
isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18815auto UseMask = [Mask,
Op, &TLO](
constAPInt &NewMask) ->
bool {
18816if (NewMask == Mask)
18821Op.getOperand(0), NewC);
18825// If the shrunk mask fits in sign extended 12 bits, let the target 18826// independent code apply it. 18830// And has a few special cases for zext. 18832// Preserve (and X, 0xffff), if zext.h exists use zext.h, 18833// otherwise use SLLI + SRLI. 18834APInt NewMask =
APInt(Mask.getBitWidth(), 0xffff);
18835if (IsLegalMask(NewMask))
18836return UseMask(NewMask);
18838// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 18839if (VT == MVT::i64) {
18841if (IsLegalMask(NewMask))
18842return UseMask(NewMask);
18846// For the remaining optimizations, we need to be able to make a negative 18847// number through a combination of mask and undemanded bits. 18851// What is the fewest number of bits we need to represent the negative number. 18854// Try to make a 12 bit negative immediate. If that fails try to make a 32 18855// bit negative immediate unless the shrunk immediate already fits in 32 bits. 18856// If we can't create a simm12, we shouldn't change opaque constants. 18857APInt NewMask = ShrunkMask;
18858if (MinSignedBits <= 12)
18860elseif (!
C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.
isSignedIntN(32))
18865// Check that our new mask is a subset of the demanded mask. 18866assert(IsLegalMask(NewMask));
18867return UseMask(NewMask);
18871staticconstuint64_t GREVMasks[] = {
18872 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18873 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18875for (
unsigned Stage = 0; Stage != 6; ++Stage) {
18876unsigned Shift = 1 << Stage;
18877if (ShAmt & Shift) {
18879uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18891constAPInt &DemandedElts,
18893unsignedDepth)
const{
18895unsigned Opc =
Op.getOpcode();
18900"Should use MaskedValueIsZero if you don't know whether Op" 18901" is a target node!");
18908// If we don't know any bits, early out. 18913// Only known if known in both the LHS and RHS. 18920// Result is either all zero or operand 0. We can propagate zeros, but not 18928// We only care about the lower 32 bits. 18930// Restore the original width by sign extending. 18938// We only care about the lower 32 bits. 18940// Restore the original width by sign extending. 18949// Restore the original width by sign extending. 18969// FIXME: This is based on the non-ratified Zbp GREV and GORC where a 18970// control value of 7 is equivalent to brev8 and orc.b. 18973// To compute zeros, we need to invert the value and invert it back after. 18980// We can use the minimum and maximum VLEN values to bound VLENB. We 18981// know VLEN must be a power of two. 18984assert(MinVLenB > 0 &&
"READ_VLENB without vector extension enabled?");
18987if (MaxVLenB == MinVLenB)
18992// fclass will only set one of the low 10 bits. 19002// We can't do anything for most intrinsics. 19004case Intrinsic::riscv_vsetvli:
19005case Intrinsic::riscv_vsetvlimax: {
19006bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
19007unsigned VSEW =
Op.getConstantOperandVal(HasAVL + 1);
19013 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
19015// Result of vsetvli must be not larger than AVL. 19016if (HasAVL && isa<ConstantSDNode>(
Op.getOperand(1)))
19017 MaxVL = std::min(MaxVL,
Op.getConstantOperandVal(1));
19019unsigned KnownZeroFirstBit =
Log2_32(MaxVL) + 1;
19032unsignedDepth)
const{
19033switch (
Op.getOpcode()) {
19039if (Tmp == 1)
return 1;
// Early out. 19042return std::min(Tmp, Tmp2);
19046// Output is either all zero or operand 0. We can propagate sign bit count 19050// We expand this at isel to negw+max. The result will have 33 sign bits 19051// if the input has at least 33 sign bits. 19054if (Tmp < 33)
return 1;
19069// TODO: As the result is sign-extended, this is conservatively correct. A 19070// more precise answer could be calculated for SRAW depending on known 19071// bits in the shift amount. 19074// The number of sign bits of the scalar result is computed by obtaining the 19075// element type of the input vector operand, subtracting its width from the 19076// XLEN, and then adding one (sign bit within the element type). If the 19077// element type is wider than XLen, the least-significant XLEN bits are 19079unsigned XLen = Subtarget.
getXLen();
19080unsigned EltBits =
Op.getOperand(0).getScalarValueSizeInBits();
19081if (EltBits <= XLen)
19082return XLen - EltBits + 1;
19086unsigned IntNo =
Op.getConstantOperandVal(1);
19090case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19091case Intrinsic::riscv_masked_atomicrmw_add_i64:
19092case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19093case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19094case Intrinsic::riscv_masked_atomicrmw_max_i64:
19095case Intrinsic::riscv_masked_atomicrmw_min_i64:
19096case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19097case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19098case Intrinsic::riscv_masked_cmpxchg_i64:
19099// riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated 19100// narrow atomic operation. These are implemented using atomic 19101// operations at the minimum supported atomicrmw/cmpxchg width whose 19102// result is then sign extended to XLEN. With +A, the minimum width is 19103// 32 for both 64 and 32. 19106assert(Subtarget.hasStdExtA());
19120// TODO: Add more target nodes. 19121switch (
Op.getOpcode()) {
19123// Integer select_cc cannot create poison. 19124// TODO: What are the FP poison semantics? 19125// TODO: This instruction blocks poison from the unselected operand, can 19126// we do anything with that? 19127return !
Op.getValueType().isInteger();
19135assert(Ld &&
"Unexpected null LoadSDNode");
19141// Only constant pools with no offset are supported. 19143auto *CNode = dyn_cast<ConstantPoolSDNode>(
Ptr);
19144if (!CNode || CNode->isMachineConstantPoolEntry() ||
19145 CNode->getOffset() != 0)
19151// Simple case, LLA. 19153auto *CNode = GetSupportedConstantPool(
Ptr);
19154if (!CNode || CNode->getTargetFlags() != 0)
19157return CNode->getConstVal();
19160// Look for a HI and ADD_LO pair. 19165auto *CNodeLo = GetSupportedConstantPool(
Ptr.getOperand(1));
19166auto *CNodeHi = GetSupportedConstantPool(
Ptr.getOperand(0).getOperand(0));
19172if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19175return CNodeLo->getConstVal();
19180assert(
MI.getOpcode() == RISCV::ReadCounterWide &&
"Unexpected instruction");
19182// To read a 64-bit counter CSR on a 32-bit target, we read the two halves. 19183// Should the count have wrapped while it was being read, we need to try 19188// csrrs x3, counterh # load high word of counter 19189// csrrs x2, counter # load low word of counter 19190// csrrs x4, counterh # load high word of counter 19191// bne x3, x4, read # check if high word reads match, otherwise try again 19204// Transfer the remainder of BB and its successor edges to DoneMBB. 19212Register ReadAgainReg =
RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19215 int64_t LoCounter =
MI.getOperand(2).getImm();
19216 int64_t HiCounter =
MI.getOperand(3).getImm();
19226BuildMI(LoopMBB,
DL,
TII->get(RISCV::CSRRS), ReadAgainReg)
19238MI.eraseFromParent();
19246assert(
MI.getOpcode() == RISCV::SplitF64Pseudo &&
"Unexpected instruction");
19274MI.eraseFromParent();
// The pseudo instruction is gone now. 19281assert(
MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19282"Unexpected instruction");
19311MI.eraseFromParent();
// The pseudo instruction is gone now. 19316switch (
MI.getOpcode()) {
19319case RISCV::Select_GPR_Using_CC_GPR:
19320case RISCV::Select_GPR_Using_CC_Imm:
19321case RISCV::Select_FPR16_Using_CC_GPR:
19322case RISCV::Select_FPR16INX_Using_CC_GPR:
19323case RISCV::Select_FPR32_Using_CC_GPR:
19324case RISCV::Select_FPR32INX_Using_CC_GPR:
19325case RISCV::Select_FPR64_Using_CC_GPR:
19326case RISCV::Select_FPR64INX_Using_CC_GPR:
19327case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19333unsigned RelOpcode,
unsigned EqOpcode,
19340Register SavedFFlags =
MRI.createVirtualRegister(&RISCV::GPRRegClass);
19343// Save the current FFLAGS. 19352// Restore the FFLAGS. 19356// Issue a dummy FEQ opcode to raise exception for signaling NaNs. 19363// Erase the pseudoinstruction. 19364MI.eraseFromParent();
19372// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) 19373// Without this, custom-inserter would have generated: 19385// A: X = ...; Y = ... 19387// C: Z = PHI [X, A], [Y, B] 19389// E: PHI [X, C], [Z, D] 19391// If we lower both Select_FPRX_ in a single step, we can instead generate: 19403// A: X = ...; Y = ... 19405// E: PHI [X, A], [X, C], [Y, D] 19415F->insert(It, FirstMBB);
19416F->insert(It, SecondMBB);
19417F->insert(It, SinkMBB);
19419// Transfer the remainder of ThisMBB and its successor edges to SinkMBB. 19425// Fallthrough block for ThisMBB. 19427// Fallthrough block for FirstMBB. 19431// This is fallthrough. 19437// Insert appropriate branch. 19449// Insert appropriate branch. 19465// Now remove the Select_FPRX_s. 19466First.eraseFromParent();
19474// To "insert" Select_* instructions, we actually have to insert the triangle 19475// control-flow pattern. The incoming instructions know the destination vreg 19476// to set, the condition code register to branch on, the true/false values to 19477// select between, and the condcode to use to select the appropriate branch. 19479// We produce the following control flow: 19486// When we find a sequence of selects we attempt to optimize their emission 19487// by sharing the control flow. Currently we only handle cases where we have 19488// multiple selects with the exact same condition (same LHS, RHS and CC). 19489// The selects may be interleaved with other instructions if the other 19490// instructions meet some requirements we deem safe: 19491// - They are not pseudo instructions. 19492// - They are debug instructions. Otherwise, 19493// - They do not have side-effects, do not access memory and their inputs do 19494// not depend on the results of the select pseudo-instructions. 19495// The TrueV/FalseV operands of the selects cannot depend on the result of 19496// previous selects in the sequence. 19497// These conditions could be further relaxed. See the X86 target for a 19498// related approach and more information. 19500// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) 19501// is checked here and handled by a separate function - 19502// EmitLoweredCascadedSelect. 19505if ((
MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19506MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19507 Next != BB->
end() && Next->getOpcode() ==
MI.getOpcode() &&
19508 Next->getOperand(5).getReg() ==
MI.getOperand(0).getReg() &&
19509 Next->getOperand(5).isKill())
19514if (
MI.getOperand(2).isReg())
19515RHS =
MI.getOperand(2).getReg();
19520 SelectDests.
insert(
MI.getOperand(0).getReg());
19524 SequenceMBBI != E; ++SequenceMBBI) {
19525if (SequenceMBBI->isDebugInstr())
19528if (SequenceMBBI->getOperand(1).getReg() !=
LHS ||
19529 !SequenceMBBI->getOperand(2).isReg() ||
19530 SequenceMBBI->getOperand(2).getReg() !=
RHS ||
19531 SequenceMBBI->getOperand(3).getImm() !=
CC ||
19532 SelectDests.
count(SequenceMBBI->getOperand(4).getReg()) ||
19533 SelectDests.
count(SequenceMBBI->getOperand(5).getReg()))
19535 LastSelectPseudo = &*SequenceMBBI;
19537 SelectDests.
insert(SequenceMBBI->getOperand(0).getReg());
19540if (SequenceMBBI->hasUnmodeledSideEffects() ||
19541 SequenceMBBI->mayLoadOrStore() ||
19542 SequenceMBBI->usesCustomInsertionHook())
19545 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19560F->insert(
I, IfFalseMBB);
19561F->insert(
I, TailMBB);
19563// Set the call frame size on entry to the new basic blocks. 19564unsigned CallFrameSize =
TII.getCallFrameSizeAt(*LastSelectPseudo);
19568// Transfer debug instructions associated with the selects to TailMBB. 19570 TailMBB->
push_back(DebugInstr->removeFromParent());
19573// Move all instructions after the sequence to TailMBB. 19574 TailMBB->
splice(TailMBB->
end(), HeadMBB,
19576// Update machine-CFG edges by transferring all successors of the current 19577// block to the new block which will contain the Phi nodes for the selects. 19579// Set the successors for HeadMBB. 19583// Insert appropriate branch. 19584if (
MI.getOperand(2).isImm())
19587 .
addImm(
MI.getOperand(2).getImm())
19595// IfFalseMBB just falls through to TailMBB. 19598// Create PHIs for all of the select pseudo-instructions. 19599auto SelectMBBI =
MI.getIterator();
19600auto SelectEnd = std::next(LastSelectPseudo->
getIterator());
19602while (SelectMBBI != SelectEnd) {
19603auto Next = std::next(SelectMBBI);
19605// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 19607TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19608 .
addReg(SelectMBBI->getOperand(4).getReg())
19610 .
addReg(SelectMBBI->getOperand(5).getReg())
19621// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW. 19625 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19626assert(
Inverse &&
"Unexpected LMUL and SEW pair for instruction");
19628 RISCV::lookupMaskedIntrinsicByUnmasked(
Inverse->Pseudo);
19629assert(
Masked &&
"Could not find masked instruction for LMUL and SEW pair");
19641Register SavedFFLAGS =
MRI.createVirtualRegister(&RISCV::GPRRegClass);
19643// Save the old value of FFLAGS. 19654 .
add(
MI.getOperand(1))
19655 .
add(
MI.getOperand(2))
19656 .
add(
MI.getOperand(3))
19658 .
add(
MI.getOperand(4))
19659 .
add(
MI.getOperand(5))
19660 .
add(
MI.getOperand(6))
19668// There is no E8 variant for VFCVT_F_X. 19675 .
add(
MI.getOperand(0))
19676 .
add(
MI.getOperand(1))
19678 .
add(
MI.getOperand(3))
19680 .
add(
MI.getOperand(4))
19681 .
add(
MI.getOperand(5))
19682 .
add(
MI.getOperand(6))
19691// Erase the pseudoinstruction. 19692MI.eraseFromParent();
19698unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19700switch (
MI.getOpcode()) {
19703case RISCV::PseudoFROUND_H:
19704 CmpOpc = RISCV::FLT_H;
19705 F2IOpc = RISCV::FCVT_W_H;
19706 I2FOpc = RISCV::FCVT_H_W;
19707 FSGNJOpc = RISCV::FSGNJ_H;
19708 FSGNJXOpc = RISCV::FSGNJX_H;
19709 RC = &RISCV::FPR16RegClass;
19711case RISCV::PseudoFROUND_H_INX:
19712 CmpOpc = RISCV::FLT_H_INX;
19713 F2IOpc = RISCV::FCVT_W_H_INX;
19714 I2FOpc = RISCV::FCVT_H_W_INX;
19715 FSGNJOpc = RISCV::FSGNJ_H_INX;
19716 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19717 RC = &RISCV::GPRF16RegClass;
19719case RISCV::PseudoFROUND_S:
19720 CmpOpc = RISCV::FLT_S;
19721 F2IOpc = RISCV::FCVT_W_S;
19722 I2FOpc = RISCV::FCVT_S_W;
19723 FSGNJOpc = RISCV::FSGNJ_S;
19724 FSGNJXOpc = RISCV::FSGNJX_S;
19725 RC = &RISCV::FPR32RegClass;
19727case RISCV::PseudoFROUND_S_INX:
19728 CmpOpc = RISCV::FLT_S_INX;
19729 F2IOpc = RISCV::FCVT_W_S_INX;
19730 I2FOpc = RISCV::FCVT_S_W_INX;
19731 FSGNJOpc = RISCV::FSGNJ_S_INX;
19732 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19733 RC = &RISCV::GPRF32RegClass;
19735case RISCV::PseudoFROUND_D:
19737 CmpOpc = RISCV::FLT_D;
19738 F2IOpc = RISCV::FCVT_L_D;
19739 I2FOpc = RISCV::FCVT_D_L;
19740 FSGNJOpc = RISCV::FSGNJ_D;
19741 FSGNJXOpc = RISCV::FSGNJX_D;
19742 RC = &RISCV::FPR64RegClass;
19744case RISCV::PseudoFROUND_D_INX:
19746 CmpOpc = RISCV::FLT_D_INX;
19747 F2IOpc = RISCV::FCVT_L_D_INX;
19748 I2FOpc = RISCV::FCVT_D_L_INX;
19749 FSGNJOpc = RISCV::FSGNJ_D_INX;
19750 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19751 RC = &RISCV::GPRRegClass;
19763F->insert(
I, CvtMBB);
19764F->insert(
I, DoneMBB);
19765// Move all instructions after the sequence to DoneMBB. 19768// Update machine-CFG edges by transferring all successors of the current 19769// block to the new block which will contain the Phi nodes for the selects. 19771// Set the successors for MBB. 19778 int64_t FRM =
MI.getOperand(3).getImm();
19786// Compare the FP value to the max value. 19787Register CmpReg =
MRI.createVirtualRegister(&RISCV::GPRRegClass);
19801// Convert to integer. 19802Register F2IReg =
MRI.createVirtualRegister(&RISCV::GPRRegClass);
19807// Convert back to FP. 19813// Restore the sign bit. 19817// Merge the results. 19824MI.eraseFromParent();
19831switch (
MI.getOpcode()) {
19834case RISCV::ReadCounterWide:
19836"ReadCounterWide is only to be used on riscv32");
19838case RISCV::Select_GPR_Using_CC_GPR:
19839case RISCV::Select_GPR_Using_CC_Imm:
19840case RISCV::Select_FPR16_Using_CC_GPR:
19841case RISCV::Select_FPR16INX_Using_CC_GPR:
19842case RISCV::Select_FPR32_Using_CC_GPR:
19843case RISCV::Select_FPR32INX_Using_CC_GPR:
19844case RISCV::Select_FPR64_Using_CC_GPR:
19845case RISCV::Select_FPR64INX_Using_CC_GPR:
19846case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19848case RISCV::BuildPairF64Pseudo:
19850case RISCV::SplitF64Pseudo:
19852case RISCV::PseudoQuietFLE_H:
19854case RISCV::PseudoQuietFLE_H_INX:
19855returnemitQuietFCMP(
MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19856case RISCV::PseudoQuietFLT_H:
19858case RISCV::PseudoQuietFLT_H_INX:
19859returnemitQuietFCMP(
MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19860case RISCV::PseudoQuietFLE_S:
19862case RISCV::PseudoQuietFLE_S_INX:
19863returnemitQuietFCMP(
MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19864case RISCV::PseudoQuietFLT_S:
19866case RISCV::PseudoQuietFLT_S_INX:
19867returnemitQuietFCMP(
MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19868case RISCV::PseudoQuietFLE_D:
19870case RISCV::PseudoQuietFLE_D_INX:
19871returnemitQuietFCMP(
MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19872case RISCV::PseudoQuietFLE_D_IN32X:
19875case RISCV::PseudoQuietFLT_D:
19877case RISCV::PseudoQuietFLT_D_INX:
19878returnemitQuietFCMP(
MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19879case RISCV::PseudoQuietFLT_D_IN32X:
19883case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19885case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19887case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19889case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19891case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19893case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19895case RISCV::PseudoFROUND_H:
19896case RISCV::PseudoFROUND_H_INX:
19897case RISCV::PseudoFROUND_S:
19898case RISCV::PseudoFROUND_S_INX:
19899case RISCV::PseudoFROUND_D:
19900case RISCV::PseudoFROUND_D_INX:
19901case RISCV::PseudoFROUND_D_IN32X:
19903case RISCV::PROBED_STACKALLOC_DYN:
19905case TargetOpcode::STATEPOINT:
19906// STATEPOINT is a pseudo instruction which has no implicit defs/uses 19907// while jal call instruction (where statepoint will be lowered at the end) 19908// has implicit def. This def is early-clobber as it will be set at 19909// the moment of the call and earlier than any use is read. 19910// Add this implicit dead def here as a workaround. 19911MI.addOperand(*
MI.getMF(),
19913 RISCV::X1,
/*isDef*/true,
19914/*isImp*/true,
/*isKill*/false,
/*isDead*/true,
19915/*isUndef*/false,
/*isEarlyClobber*/true));
19917case TargetOpcode::STACKMAP:
19918case TargetOpcode::PATCHPOINT:
19921"supported on 64-bit targets");
19928// Add FRM dependency to any instructions with dynamic rounding mode. 19931// Vector pseudos have FRM index indicated by TSFlags. 19938// If the instruction already reads FRM, don't add another read. 19939if (
MI.readsRegister(RISCV::FRM,
/*TRI=*/nullptr))
19945void RISCVTargetLowering::analyzeInputArgs(
19949unsigned NumArgs = Ins.size();
19952for (
unsigned i = 0; i != NumArgs; ++i) {
19953MVT ArgVT = Ins[i].VT;
19956Type *ArgTy =
nullptr;
19959elseif (Ins[i].isOrigArg())
19960 ArgTy = FType->
getParamType(Ins[i].getOrigArgIndex());
19963/*IsFixed=*/true, IsRet, ArgTy)) {
19971void RISCVTargetLowering::analyzeOutputArgs(
19975unsigned NumArgs = Outs.
size();
19977for (
unsigned i = 0; i != NumArgs; i++) {
19978MVT ArgVT = Outs[i].VT;
19980Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty :
nullptr;
19983 Outs[i].IsFixed, IsRet, OrigTy)) {
19991// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 20019// The caller is responsible for loading the full value if the argument is 20020// passed with CCValAssign::Indirect. 20034// If input is sign extended from 32 bits, note it for the SExtWRemoval pass. 20035if (In.isOrigArg()) {
20039// An input zero extended from i31 can also be considered sign extended. 20040if ((
BitWidth <= 32 && In.Flags.isSExt()) ||
20041 (
BitWidth < 32 && In.Flags.isZExt())) {
20063if (LocVT == MVT::i64 && VA.
getValVT() == MVT::f32)
20082// The caller is responsible for loading the full value if the argument is 20083// passed with CCValAssign::Indirect. 20092// When the value is a scalable vector, we save the pointer which points to 20093// the scalable vector value in the stack. The ValVT will be the pointer 20094// type, instead of the scalable vector type. 20098/*IsImmutable=*/true);
20112 ExtType,
DL, LocVT, Chain, FIN,
20134// Second half of f64 is passed on the stack. 20136/*IsImmutable=*/true);
20141// Second half of f64 is passed in another GPR. 20149// Transform physical registers into virtual registers. 20167if (Subtarget.hasStdExtE())
20171"(Zdinx/D) instruction set extensions");
20175if (Func.hasFnAttribute(
"interrupt")) {
20176if (!Func.arg_empty())
20178"Functions with the interrupt attribute cannot have arguments!");
20183if (!(Kind ==
"user" || Kind ==
"supervisor" || Kind ==
"machine"))
20185"Function interrupt attribute argument not supported!");
20190unsigned XLenInBytes = Subtarget.
getXLen() / 8;
20191// Used with vargs to acumulate store chains. 20192 std::vector<SDValue> OutChains;
20194// Assign locations to all of the incoming arguments. 20201 analyzeInputArgs(MF, CCInfo, Ins,
/*IsRet=*/false,
20205for (
unsigned i = 0, e = ArgLocs.
size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20208// Passing f64 on RV32D with a soft float ABI must be handled as a special 20219// If the original argument was split and passed by reference (e.g. i128 20220// on RV32), we need to load all parts of it here (using the same 20221// address). Vectors may be partly split to registers and partly to the 20222// stack, in which case the base address is partly offset and subsequent 20223// stores are relative to that. 20226unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20227unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20229while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20231unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20258// Size of the vararg save area. For now, the varargs save area is either 20259// zero or large enough to hold a0-a7. 20260int VarArgsSaveSize = XLenInBytes * (ArgRegs.
size() -
Idx);
20263// If all registers are allocated, then all varargs must be passed on the 20264// stack and we don't need to save any argregs. 20265if (VarArgsSaveSize == 0) {
20269int VaArgOffset = -VarArgsSaveSize;
20272// If saving an odd number of registers then create an extra stack slot to 20273// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 20274// offsets to even-numbered registered remain 2*XLEN-aligned. 20277 XLenInBytes, VaArgOffset -
static_cast<int>(XLenInBytes),
true);
20278 VarArgsSaveSize += XLenInBytes;
20283// Copy the integer registers that may have been used for passing varargs 20284// to the vararg save area. 20285for (
unsignedI =
Idx;
I < ArgRegs.
size(); ++
I) {
20290 Chain,
DL, ArgValue, FIN,
20292 OutChains.push_back(Store);
20298// Record the frame index of the first variable argument 20299// which is a value necessary to VASTART. 20304// All stores are grouped in one node to allow the matching between 20305// the size of Ins and InVals. This only happens for vararg functions. 20306if (!OutChains.empty()) {
20307 OutChains.push_back(Chain);
20314/// isEligibleForTailCallOptimization - Check whether the call is eligible 20315/// for tail call optimization. 20316/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 20317bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20321auto CalleeCC = CLI.CallConv;
20322auto &Outs = CLI.Outs;
20324auto CallerCC = Caller.getCallingConv();
20326// Exception-handling functions need a special set of instructions to 20327// indicate a return to the hardware. Tail-calling another function would 20328// probably break this. 20329// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 20330// should be expanded as new function attributes are introduced. 20331if (Caller.hasFnAttribute(
"interrupt"))
20334// Do not tail call opt if the stack is used to pass parameters. 20338// Do not tail call opt if any parameters need to be passed indirectly. 20339// Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 20340// passed indirectly. So the address of the value will be passed in a 20341// register, or if not available, then the address is put on the stack. In 20342// order to pass indirectly, space on the stack often needs to be allocated 20343// in order to store the value. In this case the CCInfo.getNextStackOffset() 20344// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 20345// are passed CCValAssign::Indirect. 20346for (
auto &VA : ArgLocs)
20350// Do not tail call opt if either caller or callee uses struct return 20352auto IsCallerStructRet = Caller.hasStructRetAttr();
20353auto IsCalleeStructRet = Outs.
empty() ?
false : Outs[0].Flags.isSRet();
20354if (IsCallerStructRet || IsCalleeStructRet)
20357// The callee has to preserve all registers the caller needs to preserve. 20359constuint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
20360if (CalleeCC != CallerCC) {
20361constuint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
20362if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20366// Byval parameters hand the function a pointer directly into the stack area 20367// we want to reuse during a tail call. Working around this *is* possible 20368// but less efficient and uglier in LowerCall. 20369for (
auto &Arg : Outs)
20370if (Arg.Flags.isByVal())
20381// Lower a call to a callseq_start + CALL + callseq_end chain, and add input 20382// and output parameter nodes. 20400// Analyze the operands of the call, assigning locations to each operand. 20405if (Subtarget.hasStdExtE())
20409 analyzeOutputArgs(MF, ArgCCInfo, Outs,
/*IsRet=*/false, &CLI,
20413// Check if it's really possible to do a tail call. 20415 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20421"site marked musttail");
20423// Get a count of how many bytes are to be pushed on the stack. 20426// Create local copies for byval args 20428for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
20430if (!Flags.isByVal())
20434unsignedSize = Flags.getByValSize();
20435Align Alignment = Flags.getNonZeroByValAlign();
20442 Chain = DAG.
getMemcpy(Chain,
DL, FIPtr, Arg, SizeNode, Alignment,
20443/*IsVolatile=*/false,
20444/*AlwaysInline=*/false,
/*CI*/nullptr, IsTailCall,
20452// Copy argument values to their designated locations. 20456for (
unsigned i = 0, j = 0, e = ArgLocs.
size(), OutIdx = 0; i != e;
20459SDValue ArgValue = OutVals[OutIdx];
20462// Handle passing f64 on RV32D with a soft float ABI as a special case. 20474// Get the CCValAssign for the Hi part. 20478// Second half of f64 is passed on the stack. 20479if (!StackPtr.getNode())
20489// Second half of f64 is passed in another GPR. 20491 RegsToPass.
push_back(std::make_pair(RegHigh,
Hi));
20496// Promote the value if needed. 20497// For now, only handle fully promoted and indirect arguments. 20499// Store the argument in a stack slot and pass its address. 20504// If the original argument was split (e.g. i128), we need 20505// to store the required parts of it here (and pass just one address). 20506// Vectors may be partly split to registers and partly to the stack, in 20507// which case the base address is partly offset and subsequent stores are 20508// relative to that. 20509unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20510unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20512// Calculate the total size to store. We don't have access to what we're 20513// actually storing other than performing the loop and collecting the 20516while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20517SDValue PartValue = OutVals[OutIdx + 1];
20518unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20530int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20534for (
constauto &Part : Parts) {
20535SDValue PartValue = Part.first;
20536SDValue PartOffset = Part.second;
20543 ArgValue = SpillSlot;
20548// Use local copy if it is a byval arg. 20549if (Flags.isByVal())
20550 ArgValue = ByValArgs[j++];
20553// Queue up the argument copies and emit them at the end. 20557assert(!IsTailCall &&
"Tail call not allowed if stack is used " 20558"for passing parameters");
20560// Work out the address of the stack slot. 20561if (!StackPtr.getNode())
20574// Join the stores, which are independent of one another. 20575if (!MemOpChains.
empty())
20580// Build a sequence of copy-to-reg nodes, chained and glued together. 20581for (
auto &Reg : RegsToPass) {
20582 Chain = DAG.
getCopyToReg(Chain,
DL, Reg.first, Reg.second, Glue);
20586// Validate that none of the argument registers have been marked as 20587// reserved, if so report an error. Do the same for the return address if this 20588// is not a tailcall. 20589 validateCCReservedRegs(RegsToPass, MF);
20593"Return address register required, but has been reserved."});
20595// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 20596// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 20597// split it and then direct call can be matched by PseudoCALL. 20598bool CalleeIsLargeExternalSymbol =
false;
20600if (
auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20602elseif (
auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20604 CalleeIsLargeExternalSymbol =
true;
20613// The first call operand is the chain and the second is the target address. 20618// Add argument registers to the end of the list so that they are 20619// known live into the call. 20620for (
auto &Reg : RegsToPass)
20623// Add a register mask operand representing the call-preserved registers. 20625constuint32_t *Mask =
TRI->getCallPreservedMask(MF, CallConv);
20626assert(Mask &&
"Missing call preserved mask for calling convention");
20629// Glue the call to the argument copies, if any. 20634"Unexpected CFI type for a direct call");
20639// Use software guarded branch for large code model non-indirect calls 20640// Tail call to external symbol will have a null CLI.CB and we need another 20641// way to determine the callsite type 20642bool NeedSWGuarded =
false;
20644 Subtarget.hasStdExtZicfilp() &&
20646 NeedSWGuarded =
true;
20660 Chain = DAG.
getNode(CallOpc,
DL, NodeTys, Ops);
20666// Mark the end of the call, which is glued to the call itself. 20670// Assign locations to each value returned by this call. 20673 analyzeInputArgs(MF, RetCCInfo, Ins,
/*IsRet=*/true,
CC_RISCV);
20675// Copy all of the result registers out of their specified physreg. 20676for (
unsigned i = 0, e = RVLocs.
size(); i != e; ++i) {
20677auto &VA = RVLocs[i];
20678// Copy the value out 20681// Glue the RetValue to the end of the call sequence 20685if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20686assert(VA.needsCustom());
20707CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20709for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
20710MVT VT = Outs[i].VT;
20713/*IsFixed=*/true,
/*IsRet=*/true,
nullptr))
20728// Stores the assignment of the return value to a location. 20731// Info about the registers and stack slot. 20744// Copy the result values into the output registers. 20745for (
unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20746SDValue Val = OutVals[OutIdx];
20751// Handle returning f64 on RV32D with a soft float ABI. 20755 DAG.
getVTList(MVT::i32, MVT::i32), Val);
20759Register RegHi = RVLocs[++i].getLocReg();
20765"Return value register required, but has been reserved."});
20774// Handle a 'normal' return. 20781"Return value register required, but has been reserved."});
20783// Guarantee that all emitted copies are stuck together. 20789 RetOps[0] = Chain;
// Update chain. 20791// Add the glue node if we have it. 20801// Interrupt service routines use different return instructions. 20803if (Func.hasFnAttribute(
"interrupt")) {
20804if (!Func.getReturnType()->isVoidTy())
20806"Functions with the interrupt attribute must have void return type!");
20812if (Kind ==
"supervisor")
20818return DAG.
getNode(RetOpc,
DL, MVT::Other, RetOps);
20821void RISCVTargetLowering::validateCCReservedRegs(
20831F,
"Argument register required, but has been reserved."});
20834// Check if the result of the node is only used as a return value, as 20835// otherwise we can't perform a tail-call. 20837if (
N->getNumValues() != 1)
20839if (!
N->hasNUsesOfValue(1, 0))
20842SDNode *Copy = *
N->user_begin();
20848// TODO: Handle additional opcodes in order to support tail-calling libcalls 20849// with soft float ABIs. 20854// If the ISD::CopyToReg has a glue operand, we conservatively assume it 20855// isn't safe to perform a tail call. 20856if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20859// The copy must be used by a RISCVISD::RET_GLUE, and nothing else. 20861for (
SDNode *Node : Copy->users()) {
20869 Chain = Copy->getOperand(0);
20878#define NODE_NAME_CASE(NODE) \ 20879 case RISCVISD::NODE: \ 20880 return "RISCVISD::" #NODE;
21142#undef NODE_NAME_CASE 21145/// getConstraintType - Given a constraint letter, return the type of 21146/// constraint it is for this target. 21149if (Constraint.
size() == 1) {
21150switch (Constraint[0]) {
21163case'S':
// A symbolic address 21167if (Constraint ==
"vr" || Constraint ==
"vd" || Constraint ==
"vm")
21169if (Constraint ==
"cr" || Constraint ==
"cR" || Constraint ==
"cf")
21175std::pair<unsigned, const TargetRegisterClass *>
21179// First, see if this is a constraint that directly corresponds to a RISC-V 21181if (Constraint.
size() == 1) {
21182switch (Constraint[0]) {
21184// TODO: Support fixed vectors up to XLen for P extension? 21187if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21188return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21189if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21190return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21191if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.
is64Bit())
21192return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21193return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21195if (VT == MVT::f16) {
21196if (Subtarget.hasStdExtZfhmin())
21197return std::make_pair(0U, &RISCV::FPR16RegClass);
21198if (Subtarget.hasStdExtZhinxmin())
21199return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21200 }
elseif (VT == MVT::f32) {
21201if (Subtarget.hasStdExtF())
21202return std::make_pair(0U, &RISCV::FPR32RegClass);
21203if (Subtarget.hasStdExtZfinx())
21204return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21205 }
elseif (VT == MVT::f64) {
21206if (Subtarget.hasStdExtD())
21207return std::make_pair(0U, &RISCV::FPR64RegClass);
21208if (Subtarget.hasStdExtZdinx() && !Subtarget.
is64Bit())
21209return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21210if (Subtarget.hasStdExtZdinx() && Subtarget.
is64Bit())
21211return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21215return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21219 }
elseif (Constraint ==
"vr") {
21220for (
constauto *RC :
21221 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
21222 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
21223 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
21224 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
21225 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
21226 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
21227 &RISCV::VRN2M4RegClass}) {
21229return std::make_pair(0U, RC);
21231 }
elseif (Constraint ==
"vd") {
21232for (
constauto *RC :
21233 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
21234 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
21235 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
21236 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
21237 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
21238 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
21239 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
21240 &RISCV::VRN2M4NoV0RegClass}) {
21242return std::make_pair(0U, RC);
21244 }
elseif (Constraint ==
"vm") {
21245if (
TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.
SimpleTy))
21246return std::make_pair(0U, &RISCV::VMV0RegClass);
21247 }
elseif (Constraint ==
"cr") {
21248if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21249return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21250if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21251return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21252if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.
is64Bit())
21253return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21255return std::make_pair(0U, &RISCV::GPRCRegClass);
21256 }
elseif (Constraint ==
"cR") {
21257return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21258 }
elseif (Constraint ==
"cf") {
21259if (VT == MVT::f16) {
21260if (Subtarget.hasStdExtZfhmin())
21261return std::make_pair(0U, &RISCV::FPR16CRegClass);
21262if (Subtarget.hasStdExtZhinxmin())
21263return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21264 }
elseif (VT == MVT::f32) {
21265if (Subtarget.hasStdExtF())
21266return std::make_pair(0U, &RISCV::FPR32CRegClass);
21267if (Subtarget.hasStdExtZfinx())
21268return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21269 }
elseif (VT == MVT::f64) {
21270if (Subtarget.hasStdExtD())
21271return std::make_pair(0U, &RISCV::FPR64CRegClass);
21272if (Subtarget.hasStdExtZdinx() && !Subtarget.
is64Bit())
21273return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21274if (Subtarget.hasStdExtZdinx() && Subtarget.
is64Bit())
21275return std::make_pair(0U, &RISCV::GPRCRegClass);
21279// Clang will correctly decode the usage of register name aliases into their 21280// official names. However, other frontends like `rustc` do not. This allows 21281// users of these frontends to use the ABI names for registers in LLVM-style 21282// register constraints. 21284 .
Case(
"{zero}", RISCV::X0)
21285 .
Case(
"{ra}", RISCV::X1)
21286 .
Case(
"{sp}", RISCV::X2)
21287 .
Case(
"{gp}", RISCV::X3)
21288 .
Case(
"{tp}", RISCV::X4)
21289 .
Case(
"{t0}", RISCV::X5)
21290 .
Case(
"{t1}", RISCV::X6)
21291 .
Case(
"{t2}", RISCV::X7)
21292 .
Cases(
"{s0}",
"{fp}", RISCV::X8)
21293 .
Case(
"{s1}", RISCV::X9)
21294 .
Case(
"{a0}", RISCV::X10)
21295 .
Case(
"{a1}", RISCV::X11)
21296 .
Case(
"{a2}", RISCV::X12)
21297 .
Case(
"{a3}", RISCV::X13)
21298 .
Case(
"{a4}", RISCV::X14)
21299 .
Case(
"{a5}", RISCV::X15)
21300 .
Case(
"{a6}", RISCV::X16)
21301 .
Case(
"{a7}", RISCV::X17)
21302 .
Case(
"{s2}", RISCV::X18)
21303 .
Case(
"{s3}", RISCV::X19)
21304 .
Case(
"{s4}", RISCV::X20)
21305 .
Case(
"{s5}", RISCV::X21)
21306 .
Case(
"{s6}", RISCV::X22)
21307 .
Case(
"{s7}", RISCV::X23)
21308 .
Case(
"{s8}", RISCV::X24)
21309 .
Case(
"{s9}", RISCV::X25)
21310 .
Case(
"{s10}", RISCV::X26)
21311 .
Case(
"{s11}", RISCV::X27)
21312 .
Case(
"{t3}", RISCV::X28)
21313 .
Case(
"{t4}", RISCV::X29)
21314 .
Case(
"{t5}", RISCV::X30)
21315 .
Case(
"{t6}", RISCV::X31)
21317if (XRegFromAlias != RISCV::NoRegister)
21318return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21320// Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 21321// TableGen record rather than the AsmName to choose registers for InlineAsm 21322// constraints, plus we want to match those names to the widest floating point 21323// register type available, manually select floating point registers here. 21325// The second case is the ABI name of the register, so that frontends can also 21326// use the ABI names in register constraint lists. 21327if (Subtarget.hasStdExtF()) {
21329 .
Cases(
"{f0}",
"{ft0}", RISCV::F0_F)
21330 .
Cases(
"{f1}",
"{ft1}", RISCV::F1_F)
21331 .
Cases(
"{f2}",
"{ft2}", RISCV::F2_F)
21332 .
Cases(
"{f3}",
"{ft3}", RISCV::F3_F)
21333 .
Cases(
"{f4}",
"{ft4}", RISCV::F4_F)
21334 .
Cases(
"{f5}",
"{ft5}", RISCV::F5_F)
21335 .
Cases(
"{f6}",
"{ft6}", RISCV::F6_F)
21336 .
Cases(
"{f7}",
"{ft7}", RISCV::F7_F)
21337 .
Cases(
"{f8}",
"{fs0}", RISCV::F8_F)
21338 .
Cases(
"{f9}",
"{fs1}", RISCV::F9_F)
21339 .
Cases(
"{f10}",
"{fa0}", RISCV::F10_F)
21340 .
Cases(
"{f11}",
"{fa1}", RISCV::F11_F)
21341 .
Cases(
"{f12}",
"{fa2}", RISCV::F12_F)
21342 .
Cases(
"{f13}",
"{fa3}", RISCV::F13_F)
21343 .
Cases(
"{f14}",
"{fa4}", RISCV::F14_F)
21344 .
Cases(
"{f15}",
"{fa5}", RISCV::F15_F)
21345 .
Cases(
"{f16}",
"{fa6}", RISCV::F16_F)
21346 .
Cases(
"{f17}",
"{fa7}", RISCV::F17_F)
21347 .
Cases(
"{f18}",
"{fs2}", RISCV::F18_F)
21348 .
Cases(
"{f19}",
"{fs3}", RISCV::F19_F)
21349 .
Cases(
"{f20}",
"{fs4}", RISCV::F20_F)
21350 .
Cases(
"{f21}",
"{fs5}", RISCV::F21_F)
21351 .
Cases(
"{f22}",
"{fs6}", RISCV::F22_F)
21352 .
Cases(
"{f23}",
"{fs7}", RISCV::F23_F)
21353 .
Cases(
"{f24}",
"{fs8}", RISCV::F24_F)
21354 .
Cases(
"{f25}",
"{fs9}", RISCV::F25_F)
21355 .
Cases(
"{f26}",
"{fs10}", RISCV::F26_F)
21356 .
Cases(
"{f27}",
"{fs11}", RISCV::F27_F)
21357 .
Cases(
"{f28}",
"{ft8}", RISCV::F28_F)
21358 .
Cases(
"{f29}",
"{ft9}", RISCV::F29_F)
21359 .
Cases(
"{f30}",
"{ft10}", RISCV::F30_F)
21360 .
Cases(
"{f31}",
"{ft11}", RISCV::F31_F)
21362if (FReg != RISCV::NoRegister) {
21363assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F &&
"Unknown fp-reg");
21364if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21365unsigned RegNo = FReg - RISCV::F0_F;
21366unsigned DReg = RISCV::F0_D + RegNo;
21367return std::make_pair(DReg, &RISCV::FPR64RegClass);
21369if (VT == MVT::f32 || VT == MVT::Other)
21370return std::make_pair(FReg, &RISCV::FPR32RegClass);
21371if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21372unsigned RegNo = FReg - RISCV::F0_F;
21373unsigned HReg = RISCV::F0_H + RegNo;
21374return std::make_pair(HReg, &RISCV::FPR16RegClass);
21381 .
Case(
"{v0}", RISCV::V0)
21382 .
Case(
"{v1}", RISCV::V1)
21383 .
Case(
"{v2}", RISCV::V2)
21384 .
Case(
"{v3}", RISCV::V3)
21385 .
Case(
"{v4}", RISCV::V4)
21386 .
Case(
"{v5}", RISCV::V5)
21387 .
Case(
"{v6}", RISCV::V6)
21388 .
Case(
"{v7}", RISCV::V7)
21389 .
Case(
"{v8}", RISCV::V8)
21390 .
Case(
"{v9}", RISCV::V9)
21391 .
Case(
"{v10}", RISCV::V10)
21392 .
Case(
"{v11}", RISCV::V11)
21393 .
Case(
"{v12}", RISCV::V12)
21394 .
Case(
"{v13}", RISCV::V13)
21395 .
Case(
"{v14}", RISCV::V14)
21396 .
Case(
"{v15}", RISCV::V15)
21397 .
Case(
"{v16}", RISCV::V16)
21398 .
Case(
"{v17}", RISCV::V17)
21399 .
Case(
"{v18}", RISCV::V18)
21400 .
Case(
"{v19}", RISCV::V19)
21401 .
Case(
"{v20}", RISCV::V20)
21402 .
Case(
"{v21}", RISCV::V21)
21403 .
Case(
"{v22}", RISCV::V22)
21404 .
Case(
"{v23}", RISCV::V23)
21405 .
Case(
"{v24}", RISCV::V24)
21406 .
Case(
"{v25}", RISCV::V25)
21407 .
Case(
"{v26}", RISCV::V26)
21408 .
Case(
"{v27}", RISCV::V27)
21409 .
Case(
"{v28}", RISCV::V28)
21410 .
Case(
"{v29}", RISCV::V29)
21411 .
Case(
"{v30}", RISCV::V30)
21412 .
Case(
"{v31}", RISCV::V31)
21414if (VReg != RISCV::NoRegister) {
21415if (
TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.
SimpleTy))
21416return std::make_pair(VReg, &RISCV::VMRegClass);
21417if (
TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.
SimpleTy))
21418return std::make_pair(VReg, &RISCV::VRRegClass);
21419for (
constauto *RC :
21420 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21422 VReg =
TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21423return std::make_pair(VReg, RC);
21429 std::pair<Register, const TargetRegisterClass *> Res =
21432// If we picked one of the Zfinx register classes, remap it to the GPR class. 21433// FIXME: When Zfinx is supported in CodeGen this will need to take the 21434// Subtarget into account. 21435if (Res.second == &RISCV::GPRF16RegClass ||
21436 Res.second == &RISCV::GPRF32RegClass ||
21437 Res.second == &RISCV::GPRPairRegClass)
21438return std::make_pair(Res.first, &RISCV::GPRRegClass);
21445// Currently only support length 1 constraints. 21446if (ConstraintCode.
size() == 1) {
21447switch (ConstraintCode[0]) {
21461// Currently only support length 1 constraints. 21462if (Constraint.
size() == 1) {
21463switch (Constraint[0]) {
21465// Validate & create a 12-bit signed immediate operand. 21466if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
21468if (isInt<12>(CVal))
21474// Validate & create an integer zero operand. 21480// Validate & create a 5-bit unsigned immediate operand. 21481if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
21483if (isUInt<5>(CVal))
21501if (Subtarget.hasStdExtZtso()) {
21517if (Subtarget.hasStdExtZtso()) {
21525if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21533// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 21534// point operations can't be used in an lr/sc sequence without breaking the 21535// forward-progress guarantee. 21543// Don't expand forced atomics, we want to have __sync libcalls instead. 21544if (Subtarget.hasForcedAtomics())
21549if (Subtarget.hasStdExtZacas() &&
21550 (
Size >= 32 || Subtarget.hasStdExtZabha()))
21556if (
Size < 32 && !Subtarget.hasStdExtZabha())
21569return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21571return Intrinsic::riscv_masked_atomicrmw_add_i32;
21573return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21575return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21577return Intrinsic::riscv_masked_atomicrmw_max_i32;
21579return Intrinsic::riscv_masked_atomicrmw_min_i32;
21581return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21583return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21592return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21594return Intrinsic::riscv_masked_atomicrmw_add_i64;
21596return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21598return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21600return Intrinsic::riscv_masked_atomicrmw_max_i64;
21602return Intrinsic::riscv_masked_atomicrmw_min_i64;
21604return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21606return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21616// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 21617// the atomic instruction with an AtomicRMWInst::And/Or with appropriate 21618// mask, as this produces better code than the LR/SC loop emitted by 21619// int_riscv_masked_atomicrmw_xchg. 21632unsigned XLen = Subtarget.
getXLen();
21648// Must pass the shift amount needed to sign extend the loaded value prior 21649// to performing a signed comparison for min/max. ShiftAmt is the number of 21650// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 21651// is the number of bits to left+right shift the value in order to 21661 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21664 Builder.
CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21675// Don't expand forced atomics, we want to have __sync libcalls instead. 21676if (Subtarget.hasForcedAtomics())
21680if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21689unsigned XLen = Subtarget.
getXLen();
21691Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21696 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21700 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21708// We have indexed loads for all supported EEW types. Indices are always 21723return Subtarget.hasStdExtZfhmin();
21725return Subtarget.hasStdExtF();
21727return Subtarget.hasStdExtD();
21734// If we are using the small code model, we can reduce size of jump table 21735// entry to 4 bytes. 21752// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power 21753// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be 21754// a power of two as well. 21755// FIXME: This doesn't work for zve32, but that's already broken 21756// elsewhere for the same reason. 21759"RVVBitsPerBlock changed, audit needed");
21767// Target does not support indexed loads. 21768if (!Subtarget.hasVendorXTHeadMemIdx())
21776 int64_t RHSC =
RHS->getSExtValue();
21780// The constants that can be encoded in the THeadMemIdx instructions 21781// are of the form (sign_extend(imm5) << imm2). 21782bool isLegalIndexedOffset =
false;
21783for (
unsigned i = 0; i < 4; i++)
21784if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21785 isLegalIndexedOffset =
true;
21789if (!isLegalIndexedOffset)
21806 VT = LD->getMemoryVT();
21807Ptr = LD->getBasePtr();
21808 }
elseif (
StoreSDNode *ST = dyn_cast<StoreSDNode>(
N)) {
21809 VT = ST->getMemoryVT();
21810Ptr = ST->getBasePtr();
21826if (Subtarget.hasVendorXCVmem() && !Subtarget.
is64Bit()) {
21831Base = LS->getBasePtr();
21835if (
Base ==
Op->getOperand(0))
21837elseif (
Base ==
Op->getOperand(1))
21849 VT = LD->getMemoryVT();
21850Ptr = LD->getBasePtr();
21851 }
elseif (
StoreSDNode *ST = dyn_cast<StoreSDNode>(
N)) {
21852 VT = ST->getMemoryVT();
21853Ptr = ST->getBasePtr();
21859// Post-indexing updates the base, so it's not a valid transform 21860// if that's not the same as the load's pointer. 21891// Zacas will use amocas.w which does not require extension. 21896constConstant *PersonalityFn)
const{
21901constConstant *PersonalityFn)
const{
21906// Return false to suppress the unnecessary extensions if the LibCall 21907// arguments or return value is a float narrower than XLEN on a soft FP ABI. 21916bool IsSigned)
const{
21925// Check integral scalar types. 21929// Omit the optimization if the sub target has the M extension and the data 21930// size exceeds XLen. 21931constbool HasZmmul = Subtarget.hasStdExtZmmul();
21935auto *ConstNode = cast<ConstantSDNode>(
C);
21936constAPInt &Imm = ConstNode->getAPIntValue();
21938// Break the MUL to a SLLI and an ADD/SUB. 21939if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21940 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21943// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. 21944if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21945 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21946 (Imm - 8).isPowerOf2()))
21949// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 21950// a pair of LUI/ADDI. 21951if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21952 ConstNode->hasOneUse()) {
21953APInt ImmS = Imm.ashr(Imm.countr_zero());
21954if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21955 (1 - ImmS).isPowerOf2())
21964// Let the DAGCombiner decide for vectors. 21969// Let the DAGCombiner decide for larger types. 21973// It is worse if c1 is simm12 while c1*c2 is not. 21978if (C1.
isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21981// Default to true and let the DAGCombiner decide. 21987unsigned *
Fast)
const{
21990 *
Fast = Subtarget.enableUnalignedScalarMem();
21991return Subtarget.enableUnalignedScalarMem();
21994// All vector implementations must support element alignment 22002// Note: We lower an unmasked unaligned vector access to an equally sized 22003// e8 element type access. Given this, we effectively support all unmasked 22004// misaligned accesses. TODO: Work through the codegen implications of 22005// allowing such accesses to be formed, and considered fast. 22007 *
Fast = Subtarget.enableUnalignedVectorMem();
22008return Subtarget.enableUnalignedVectorMem();
22017if (FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat))
22020// We use LMUL1 memory operations here for a non-obvious reason. Our caller 22021// has an expansion threshold, and we want the number of hardware memory 22022// operations to correspond roughly to that threshold. LMUL>1 operations 22023// are typically expanded linearly internally, and thus correspond to more 22024// than one actual memory operation. Note that store merging and load 22025// combining will typically form larger LMUL operations from the LMUL1 22026// operations emitted here, and that's okay because combining isn't 22027// introducing new memory operations; it's just merging existing ones. 22029if (
Op.size() < MinVLenInBytes)
22030// TODO: Figure out short memops. For the moment, do the default thing 22031// which ends up using scalar sequences. 22034// If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support 22039// Prefer i8 for non-zero memset as it allows us to avoid materializing 22040// a large scalar constant and instead use vmv.v.x/i to do the 22041// broadcast. For everything else, prefer ELenVT to minimize VL and thus 22042// maximize the chance we can encode the size in the vsetvli. 22044MVT PreferredVT = (
Op.isMemset() && !
Op.isZeroMemset()) ? MVT::i8 : ELenVT;
22046// Do we have sufficient alignment for our preferred VT? If not, revert 22047// to largest size allowed by our alignment criteria. 22048if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
22050if (
Op.isFixedDstAlign())
22051 RequiredAlign = std::min(RequiredAlign,
Op.getDstAlign());
22053 RequiredAlign = std::min(RequiredAlign,
Op.getSrcAlign());
22061unsigned NumParts,
MVT PartVT, std::optional<CallingConv::ID>
CC)
const{
22062bool IsABIRegCopy =
CC.has_value();
22065MVT PairVT = Subtarget.
is64Bit() ? MVT::i128 : MVT::i64;
22066if ((ValueVT == PairVT ||
22067 (!Subtarget.
is64Bit() && Subtarget.hasStdExtZdinx() &&
22068 ValueVT == MVT::f64)) &&
22069 NumParts == 1 && PartVT == MVT::Untyped) {
22070// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx 22072if (ValueVT == MVT::f64)
22075// Always creating an MVT::Untyped part, so always use 22076// RISCVISD::BuildGPRPair. 22081if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22082 PartVT == MVT::f32) {
22083// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float 22084// nan, and cast to f32. 22097 [[maybe_unused]]
unsigned ValLMUL =
22101 [[maybe_unused]]
unsigned PartLMUL =
22104assert(ValNF == PartNF && ValLMUL == PartLMUL &&
22105"RISC-V vector tuple type only accepts same register class type " 22121if (PartVTBitSize % ValueVTBitSize == 0) {
22122assert(PartVTBitSize >= ValueVTBitSize);
22123// If the element types are different, bitcast to the same element type of 22125// Give an example here, we want copy a <vscale x 1 x i8> value to 22126// <vscale x 4 x i16>. 22127// We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert 22128// subvector, then we can bitcast to <vscale x 4 x i16>. 22129if (ValueEltVT != PartEltVT) {
22130if (PartVTBitSize > ValueVTBitSize) {
22132assert(Count != 0 &&
"The number of element should not be zero.");
22155MVT PartVT,
EVT ValueVT, std::optional<CallingConv::ID>
CC)
const{
22156bool IsABIRegCopy =
CC.has_value();
22158MVT PairVT = Subtarget.
is64Bit() ? MVT::i128 : MVT::i64;
22159if ((ValueVT == PairVT ||
22160 (!Subtarget.
is64Bit() && Subtarget.hasStdExtZdinx() &&
22161 ValueVT == MVT::f64)) &&
22162 NumParts == 1 && PartVT == MVT::Untyped) {
22163// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx 22167// Always starting with an MVT::Untyped part, so always use 22168// RISCVISD::SplitGPRPair 22173if (ValueVT == MVT::f64)
22178if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22179 PartVT == MVT::f32) {
22182// Cast the f32 to i32, truncate to i16, and cast back to [b]f16. 22196if (PartVTBitSize % ValueVTBitSize == 0) {
22197assert(PartVTBitSize >= ValueVTBitSize);
22198EVT SameEltTypeVT = ValueVT;
22199// If the element types are different, convert it to the same element type 22201// Give an example here, we want copy a <vscale x 1 x i8> value from 22202// <vscale x 4 x i16>. 22203// We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, 22204// then we can extract <vscale x 1 x i8>. 22205if (ValueEltVT != PartEltVT) {
22207assert(Count != 0 &&
"The number of element should not be zero.");
22221// When aggressively optimizing for code size, we prefer to use a div 22222// instruction, as it is usually smaller than the alternative sequence. 22223// TODO: Add vector division? 22224bool OptSize = Attr.
hasFnAttr(Attribute::MinSize);
22229// Scalarize zero_ext and sign_ext might stop match to widening instruction in 22231unsigned Opc =
N->getOpcode();
22246// Fuchsia provides a fixed TLS slot for the stack cookie. 22247// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 22251// Android provides a fixed TLS slot for the stack cookie. See the definition 22252// of TLS_SLOT_STACK_GUARD in 22253// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h 22259if (M->getStackProtectorGuard() ==
"tls") {
22260// Users must specify the offset explicitly 22261intOffset = M->getStackProtectorGuardOffset();
22272// Don't lower vlseg/vsseg for vector types that can't be split. 22283if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22286// Sometimes the interleaved access pass picks up splats as interleaves of 22287// one element. Don't lower these. 22288if (FVTy->getNumElements() < 2)
22293// The intrinsics for scalable vectors are not overloaded on pointer type 22294// and can only handle the default address space. 22299// Need to make sure that EMUL * NFIELDS ≤ 8 22303return Factor * LMUL <= 8;
22307Align Alignment)
const{
22311// Only support fixed vectors if we know the minimum vector size. 22319if (!Subtarget.enableUnalignedVectorMem() &&
22327 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22328 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22329 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22330 Intrinsic::riscv_seg8_load};
22332/// Lower an interleaved load into a vlsegN intrinsic. 22334/// E.g. Lower an interleaved load (Factor = 2): 22335/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr 22336/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements 22337/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements 22340/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( 22342/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 22343/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 22351auto *VTy = cast<FixedVectorType>(Shuffles[0]->
getType());
22359// If the segment load is going to be performed segment at a time anyways 22360// and there's only one element used, use a strided load instead. This 22361// will be equally fast, and create less vector register pressure. 22363unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22364Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22365Value *
Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22372 {VTy, BasePtr->getType(), Stride->
getType()},
22373 {BasePtr, Stride, Mask, VL});
22376 Shuffles[0]->replaceAllUsesWith(CI);
22380Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22386for (
unsigned i = 0; i < Shuffles.
size(); i++) {
22388 Shuffles[i]->replaceAllUsesWith(SubVec);
22395 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22396 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22397 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22398 Intrinsic::riscv_seg8_store};
22400/// Lower an interleaved store into a vssegN intrinsic. 22402/// E.g. Lower an interleaved store (Factor = 3): 22403/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, 22404/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> 22405/// store <12 x i32> %i.vec, <12 x i32>* %ptr 22408/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> 22409/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> 22410/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> 22411/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, 22414/// Note that the new shufflevectors will be removed and we'll only generate one 22415/// vsseg3 instruction in CodeGen. 22418unsigned Factor)
const{
22421auto *ShuffleVTy = cast<FixedVectorType>(SVI->
getType());
22422// Given SVI : <n*factor x ty>, then VTy : <n x ty> 22424 ShuffleVTy->getNumElements() / Factor);
22426 SI->getPointerAddressSpace(),
22427 SI->getDataLayout()))
22433// If the segment store only has one active lane (i.e. the interleave is 22434// just a spread shuffle), we can use a strided store instead. This will 22435// be equally fast, and create less vector register pressure. 22438unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22440auto *DataVTy = cast<FixedVectorType>(
Data->getType());
22441Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22442Value *
Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22448 Intrinsic::experimental_vp_strided_store,
22449 {
Data->getType(), BasePtr->getType(), Stride->
getType()},
22450 {
Data, BasePtr, Stride, Mask, VL});
22459 {VTy, SI->getPointerOperandType(), XLenTy});
22463for (
unsigned i = 0; i < Factor; i++) {
22469// This VL should be OK (should be executable in one vsseg instruction, 22470// potentially under larger LMULs) because we checked that the fixed vector 22471// type fits in isLegalInterleavedAccessType 22472Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22473 Ops.
append({SI->getPointerOperand(), VL});
22482unsigned Factor = DeinterleaveValues.
size();
22489auto *ResVTy = cast<VectorType>(DeinterleaveValues[0]->
getType());
22500if (
auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22501Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22508 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22509 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22510 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22511 Intrinsic::riscv_vlseg8};
22513unsigned SEW =
DL.getTypeSizeInBits(ResVTy->getElementType());
22514unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22518 NumElts * SEW / 8),
22524 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22526 ConstantInt::get(XLenTy,
Log2_64(SEW))});
22530for (
unsigned i = 0; i < Factor; ++i) {
22532 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22539// We have to create a brand new ExtractValue to replace each 22540// of these old ExtractValue instructions. 22551unsigned Factor = InterleaveValues.
size();
22558auto *InVTy = cast<VectorType>(InterleaveValues[0]->
getType());
22562 SI->getPointerAddressSpace(),
DL))
22567if (
auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22570 {InVTy, SI->getPointerOperandType(), XLenTy});
22573 InterleaveValues.
end());
22574Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22575 Ops.
append({SI->getPointerOperand(), VL});
22580 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22581 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22582 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22583 Intrinsic::riscv_vsseg8};
22585unsigned SEW =
DL.getTypeSizeInBits(InVTy->getElementType());
22586unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22588 SI->getContext(),
"riscv.vector.tuple",
22590 NumElts * SEW / 8),
22594 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22599for (
unsigned i = 0; i < Factor; ++i)
22601 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22602 {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
22604 Builder.
CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22605 ConstantInt::get(XLenTy,
Log2_64(SEW))});
22616"Invalid call instruction for a KCFI check");
22618MBBI->getOpcode()));
22621Target.setIsRenamable(
false);
22629#define GET_REGISTER_MATCHER 22630#include "RISCVGenAsmMatcher.inc" 22636if (Reg == RISCV::NoRegister)
22638if (Reg == RISCV::NoRegister)
22650constMDNode *NontemporalInfo =
I.getMetadata(LLVMContext::MD_nontemporal);
22652if (NontemporalInfo ==
nullptr)
22655// 1 for default value work as __RISCV_NTLH_ALL 22656// 2 -> __RISCV_NTLH_INNERMOST_PRIVATE 22657// 3 -> __RISCV_NTLH_ALL_PRIVATE 22658// 4 -> __RISCV_NTLH_INNERMOST_SHARED 22659// 5 -> __RISCV_NTLH_ALL 22660int NontemporalLevel = 5;
22661constMDNode *RISCVNontemporalInfo =
22662I.getMetadata(
"riscv-nontemporal-domain");
22663if (RISCVNontemporalInfo !=
nullptr)
22666 cast<ConstantAsMetadata>(RISCVNontemporalInfo->
getOperand(0))
22670assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22671"RISC-V target doesn't support this non-temporal domain.");
22673 NontemporalLevel -= 2;
22675if (NontemporalLevel & 0b1)
22677if (NontemporalLevel & 0b10)
22700returnisTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22703return Subtarget.hasStdExtZbb() &&
22714if (Subtarget.hasStdExtZalasr()) {
22715if (Subtarget.hasStdExtZtso()) {
22716// Zalasr + TSO means that atomic_load_acquire and atomic_store_release 22717// should be lowered to plain load/store. The easiest way to do this is 22718// to say we should insert fences for them, and the fence insertion code 22719// will just not insert any fences 22720auto *LI = dyn_cast<LoadInst>(
I);
22721auto *SI = dyn_cast<StoreInst>(
I);
22726// Here, this is a load or store which is seq_cst, and needs a .aq or 22727// .rl therefore we shouldn't try to insert fences 22730// Here, we are a TSO inst that isn't a seq_cst load/store 22731return isa<LoadInst>(
I) || isa<StoreInst>(
I);
22735// Note that one specific case requires fence insertion for an 22736// AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather 22737// than this hook due to limitations in the interface here. 22738return isa<LoadInst>(
I) || isa<StoreInst>(
I);
22743// GISel support is in progress or complete for these opcodes. 22745if (
Op == Instruction::Add ||
Op == Instruction::Sub ||
22746Op == Instruction::And ||
Op == Instruction::Or ||
22747Op == Instruction::Xor ||
Op == Instruction::InsertElement ||
22748Op == Instruction::ShuffleVector ||
Op == Instruction::Load ||
22749Op == Instruction::Freeze ||
Op == Instruction::Store)
22757 !isa<ReturnInst>(&Inst))
22760if (
constAllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22761if (AI->getAllocatedType()->isScalableTy())
22769RISCVTargetLowering::BuildSDIVPow2(
SDNode *
N,
constAPInt &Divisor,
22774returnSDValue(
N, 0);
// Lower SDIV as SDIV 22776// Only perform this transform if short forward branch opt is supported. 22777if (!Subtarget.hasShortForwardBranchOpt())
22779EVT VT =
N->getValueType(0);
22780if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.
is64Bit())))
22783// Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. 22784if (Divisor.
sgt(2048) || Divisor.
slt(-2048))
22789bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22791if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22792return !Subtarget.hasStdExtZbs() && AndMask.
ugt(1024);
22796unsigned RISCVTargetLowering::getMinimumJumpTableEntries()
const{
22804if (Subtarget.hasStdExtZicfilp()) {
22805// When Zicfilp enabled, we need to use software guarded branch for jump 22808// Jump table debug info is only needed if CodeView is enabled. 22816// If an output pattern produces multiple instructions tablegen may pick an 22817// arbitrary type from an instructions destination register class to use for the 22818// VT of that MachineSDNode. This VT may be used to look up the representative 22819// register class. If the type isn't legal, the default implementation will 22820// not find a register class. 22822// Some integer types smaller than XLen are listed in the GPR register class to 22823// support isel patterns for GISel, but are not legal in SelectionDAG. The 22824// arbitrary type tablegen picks may be one of these smaller types. 22826// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's 22827// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern. 22828std::pair<const TargetRegisterClass *, uint8_t>
22848#define GET_RISCVVIntrinsicsTable_IMPL 22849#include "RISCVGenSearchableTables.inc" 22851}
// namespace llvm::RISCVVIntrinsicsTable 22855// If the function specifically requests inline stack probes, emit them. 22864Align StackAlign)
const{
22865// The default stack probe size is 4096 if the function has no 22866// stack-probe-size attribute. 22868unsigned StackProbeSize =
22870// Round down to the stack alignment. 22872return StackProbeSize ? StackProbeSize : StackAlign.
value();
22887 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
22889EVT VT =
Op.getValueType();
22891// Construct the new SP value in a GPR. 22899// Set the real SP to the new value with a probing loop. 22910Register TargetReg =
MI.getOperand(1).getReg();
22913bool IsRV64 = Subtarget.
is64Bit();
22921 MF.
insert(MBBInsertPoint, LoopTestMBB);
22923 MF.
insert(MBBInsertPoint, ExitMBB);
22928// ScratchReg = ProbeSize 22932// SUB SP, SP, ProbeSize 22937// s[d|w] zero, 0(sp) 22939TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
22944// BLT TargetReg, SP, LoopTest 22950// Adjust with: MV SP, TargetReg. 22962MI.eraseFromParent();
22964return ExitMBB->
begin()->getParent();
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr Register SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static constexpr int Concat[]
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool sgt(const APInt &RHS) const
Signed greater than comparison.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
void clearAllBits()
Set every bit to 0.
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
bool isMask(unsigned numBits) const
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
APInt sext(unsigned width) const
Sign extend to a new width.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
bool slt(const APInt &RHS) const
Signed less than comparison.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getCompareOperand()
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
bool all() const
all - Returns true if all bits are set.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Argument * getArg(unsigned i) const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Helper struct to store a base, index and offset that forms an address.
int64_t getOffset() const
bool hasExternalWeakLinkage() const
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
BasicBlock * GetInsertBlock() const
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
const MDOperand & getOperand(unsigned I) const
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
void setVarArgsFrameIndex(int Index)
int getVarArgsFrameIndex() const
void setVarArgsSaveSize(int Size)
void addSExt32Register(Register Reg)
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool useCCMovInsn() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
const RISCVFrameLowering * getFrameLowering() const override
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool lowerInterleaveIntrinsicToStore(StoreInst *SI, ArrayRef< Value * > InterleaveValues) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, ArrayRef< Value * > DeinterleaveValues) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getSplatIndex() const
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
constexpr size_t size() const
size - Get the string size.
std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool EnableExtLdPromotion
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr bool isZero() const
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SET_ROUNDING
Set rounding mode.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ UNDEF
UNDEF - An undefined node.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ STRICT_FADD
Constrained versions of the binary floating point operators.
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ STRICT_VFCVT_RTZ_XU_F_VL
@ TRUNCATE_VECTOR_VL_USAT
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ STRICT_VFROUND_NOEXCEPT_VL
@ SPLAT_VECTOR_SPLIT_I64_VL
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
@ TRUNCATE_VECTOR_VL_SSAT
@ STRICT_VFCVT_RTZ_X_F_VL
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static constexpr roundingMode rmNearestTiesToEven
static unsigned int semanticsPrecision(const fltSemantics &)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
uint64_t getScalarStoreSize() const
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
uint64_t getScalarSizeInBits() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isFixedLengthVector() const
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
bool isInteger() const
Return true if this is an integer or a vector integer type.
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned getBitWidth() const
Get the bit width of this value.
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
void resetAll()
Resets the known state of all bits.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
const ConstantInt * CFIType
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
bool isAfterLegalizeDAG() const
void AddToWorklist(SDNode *N)
bool isCalledByLegalizer() const
bool recursivelyDeleteUnusedNodes(SDNode *N)
bool isBeforeLegalize() const
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)