1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This implements the TargetLowering class. 11//===----------------------------------------------------------------------===// 39/// NOTE: The TargetMachine owns TLOF. 51/// Check whether a given call node is in tail position within its function. If 52/// so, it sets Chain to the input chain of the tail call. 57// First, check if tail calls have been disabled in this function. 58if (
F.getFnAttribute(
"disable-tail-calls").getValueAsBool())
61// Conservatively require the attributes of the call to match those of 62// the return. Ignore following attributes because they don't affect the 64AttrBuilder CallerAttrs(
F.getContext(),
F.getAttributes().getRetAttrs());
65for (
constauto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef,
68 Attribute::Range, Attribute::NoFPClass})
74// It's not safe to eliminate the sign / zero extension of the return value. 75if (CallerAttrs.
contains(Attribute::ZExt) ||
76 CallerAttrs.
contains(Attribute::SExt))
79// Check if the only use is a function return node. 87for (
unsignedI = 0, E = ArgLocs.
size();
I != E; ++
I) {
92// Only look at callee saved registers. 95// Check that we pass the value used for the caller. 96// (We look for a CopyFromReg reading a virtual register that is used 97// for the function live-in value of register Reg) 103Register ArgReg = cast<RegisterSDNode>(
Value->getOperand(1))->getReg();
104if (
MRI.getLiveInPhysReg(ArgReg) != Reg)
110/// Set CallLoweringInfo attribute flags based on a call instruction 111/// and called function attributes. 114IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127Alignment = Call->getParamStackAlign(ArgIdx);
130"multiple ABI attributes?");
144/// Generate a libcall taking the given operands as arguments and returning a 145/// result of type RetVT. 146std::pair<SDValue, SDValue>
156 Args.reserve(Ops.
size());
159for (
unsigned i = 0; i < Ops.
size(); ++i) {
162 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.
getContext());
165 Entry.IsZExt = !Entry.IsSExt;
169 Entry.IsSExt = Entry.IsZExt =
false;
171 Args.push_back(Entry);
174if (LC == RTLIB::UNKNOWN_LIBCALL)
182bool zeroExtend = !signExtend;
186 signExtend = zeroExtend =
false;
197return LowerCallTo(CLI);
201 std::vector<EVT> &MemOps,
unsigned Limit,
constMemOp &
Op,
unsigned DstAS,
203if (Limit != ~
unsigned(0) &&
Op.isMemcpyWithFixedDstAlign() &&
204Op.getSrcAlign() <
Op.getDstAlign())
209if (VT == MVT::Other) {
210// Use the largest integer type whose alignment constraints are satisfied. 211// We only need to check DstAlign here as SrcAlign is always greater or 212// equal to DstAlign (or zero). 213 VT = MVT::LAST_INTEGER_VALUETYPE;
214if (
Op.isFixedDstAlign())
220// Find the largest legal integer type. 221MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
226// If the type we've chosen is larger than the largest legal integer type 227// then use that instead. 232unsigned NumMemOps = 0;
236while (VTSize >
Size) {
237// For now, only use non-vector load / store's for the left-over pieces. 247elseif (NewVT == MVT::i64 &&
250// i64 is usually not legal on 32-bit targets, but f64 may be. 265// If the new VT cannot cover all of the remaining bits, then consider 266// issuing a (or a pair of) unaligned and overlapping load / store. 268if (NumMemOps &&
Op.allowOverlap() && NewVTSize <
Size &&
270 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
280if (++NumMemOps > Limit)
283 MemOps.push_back(VT);
290/// Soften the operands of a comparison. This code is shared among BR_CC, 291/// SELECT_CC, and SETCC handlers. 298return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
308bool IsSignaling)
const{
309// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc 310// not supporting it. We can update this code when libgcc provides such 313assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314 &&
"Unsupported setcc type!");
316// Expand into one or more soft-fp libcall(s). 317RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318bool ShouldInvertCC =
false;
322 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
328 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329 (VT == MVT::f64) ? RTLIB::UNE_F64 :
330 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
334 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335 (VT == MVT::f64) ? RTLIB::OGE_F64 :
336 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
340 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341 (VT == MVT::f64) ? RTLIB::OLT_F64 :
342 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
346 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347 (VT == MVT::f64) ? RTLIB::OLE_F64 :
348 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
352 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353 (VT == MVT::f64) ? RTLIB::OGT_F64 :
354 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
357 ShouldInvertCC =
true;
360 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361 (VT == MVT::f64) ? RTLIB::UO_F64 :
362 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
366 ShouldInvertCC =
true;
369 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370 (VT == MVT::f64) ? RTLIB::UO_F64 :
371 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
377// Invert CC for unordered comparisons 378 ShouldInvertCC =
true;
381 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382 (VT == MVT::f64) ? RTLIB::OGE_F64 :
383 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
386 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387 (VT == MVT::f64) ? RTLIB::OGT_F64 :
388 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
391 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392 (VT == MVT::f64) ? RTLIB::OLE_F64 :
393 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
396 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397 (VT == MVT::f64) ? RTLIB::OLT_F64 :
398 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
404// Use the target specific return value for comparison lib calls. 406SDValue Ops[2] = {NewLHS, NewRHS};
411auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
418 CCCode = getSetCCInverse(CCCode, RetVT);
421if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
428auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
431 CCCode = getSetCCInverse(CCCode, RetVT);
432 NewLHS = DAG.
getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
442/// Return the entry encoding for a jump table in the current function. The 443/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. 445// In non-pic modes, just use the address of a block. 446if (!isPositionIndependent())
449// In PIC mode, if the target supports a GPRel32 directive, use it. 453// Otherwise, use a label difference. 459// If our PIC model is GP relative, use the global offset table as the base. 460unsigned JTEncoding = getJumpTableEncoding();
469/// This returns the relocation base for the given PIC jumptable, the same as 470/// getPICJumpTableRelocBase, but as an MCExpr. 474// The normal PIC reloc base is the label at the start of the jump table. 482// Jump table debug info is only needed if CodeView is enabled. 494// If the address is not even local to this DSO we will have to load it from 495// a got and then add the offset. 496if (!TM.shouldAssumeDSOLocal(GV))
499// If the code is position independent we will have to add a base register. 500if (isPositionIndependent())
503// Otherwise we can do it. 507//===----------------------------------------------------------------------===// 508// Optimization Methods 509//===----------------------------------------------------------------------===// 511/// If the specified instruction has a constant integer operand and there are 512/// bits set in that constant that are not demanded, then clear those bits and 516constAPInt &DemandedElts,
519unsigned Opcode =
Op.getOpcode();
521// Early-out if we've ended up calling an undemanded node, leave this to 526// Do target-specific constant optimization. 527if (targetShrinkDemandedConstant(
Op,
DemandedBits, DemandedElts, TLO))
530// FIXME: ISD::SELECT, ISD::SELECT_CC 537auto *Op1C = dyn_cast<ConstantSDNode>(
Op.getOperand(1));
538if (!Op1C || Op1C->isOpaque())
541// If this is a 'not' op, don't touch it because that's a canonical form. 542constAPInt &
C = Op1C->getAPIntValue();
547EVT VT =
Op.getValueType();
564EVT VT =
Op.getValueType();
571/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. 572/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast, 573/// but it could be generalized for targets with other types of implicit 579"ShrinkDemandedOp only supports binary operators!");
580assert(
Op.getNode()->getNumValues() == 1 &&
581"ShrinkDemandedOp only supports nodes with one result!");
583EVT VT =
Op.getValueType();
587// Early return, as this function cannot handle vector types. 592Op.getOperand(1).getValueType().getScalarSizeInBits() ==
BitWidth &&
593"ShrinkDemandedOp only supports operands that have the same size!");
595// Don't do this if the node has another user, which may require the 597if (!
Op.getNode()->hasOneUse())
600// Search for the smallest integer type with free casts to and from 601// Op's type. For expedience, just check power-of-2 integer types. 607// We found a type with free casts. 609// If the operation has the 'disjoint' flag, then the 610// operands on the new node are also disjoint. 614Op.getOpcode(), dl, SmallVT,
617assert(DemandedSize <= SmallVTBits &&
"Narrowed below demanded bits?");
632bool Simplified = SimplifyDemandedBits(
Op,
DemandedBits, Known, TLO);
641constAPInt &DemandedElts,
661bool AssumeSingleUse)
const{
662EVT VT =
Op.getValueType();
664// Since the number of lanes in a scalable vector is unknown at compile time, 665// we track one bit which is implicitly broadcast to all lanes. This means 666// that all lanes in a scalable vector are considered demanded. 674// TODO: Under what circumstances can we create nodes? Constant folding? 678EVT VT =
Op.getValueType();
680// Limit search depth. 688// Not demanding any bits/elts from Op. 696switch (
Op.getOpcode()) {
702EVT SrcVT = Src.getValueType();
703EVT DstVT =
Op.getValueType();
709if (NumSrcEltBits == NumDstEltBits)
710if (
SDValue V = SimplifyMultipleUseDemandedBits(
714if (SrcVT.
isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
715unsigned Scale = NumDstEltBits / NumSrcEltBits;
719for (
unsigned i = 0; i != Scale; ++i) {
720unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
721unsigned BitOffset = EltOffset * NumSrcEltBits;
724 DemandedSrcBits |= Sub;
725for (
unsigned j = 0; j != NumElts; ++j)
727 DemandedSrcElts.
setBit((j * Scale) + i);
731if (
SDValue V = SimplifyMultipleUseDemandedBits(
732 Src, DemandedSrcBits, DemandedSrcElts, DAG,
Depth + 1))
736// TODO - bigendian once we have test coverage. 737if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
738unsigned Scale = NumSrcEltBits / NumDstEltBits;
742for (
unsigned i = 0; i != NumElts; ++i)
743if (DemandedElts[i]) {
744unsignedOffset = (i % Scale) * NumDstEltBits;
746 DemandedSrcElts.
setBit(i / Scale);
749if (
SDValue V = SimplifyMultipleUseDemandedBits(
750 Src, DemandedSrcBits, DemandedSrcElts, DAG,
Depth + 1))
759/*PoisonOnly=*/false))
767// If all of the demanded bits are known 1 on one side, return the other. 768// These bits cannot contribute to the result of the 'and' in this 771returnOp.getOperand(0);
773returnOp.getOperand(1);
780// If all of the demanded bits are known zero on one side, return the 781// other. These bits cannot contribute to the result of the 'or' in this 784returnOp.getOperand(0);
786returnOp.getOperand(1);
793// If all of the demanded bits are known zero on one side, return the 796returnOp.getOperand(0);
798returnOp.getOperand(1);
804returnOp.getOperand(0);
808returnOp.getOperand(1);
812// If we are only demanding sign bits then we can use the shift source 814if (std::optional<uint64_t> MaxSA =
817unsigned ShAmt = *MaxSA;
818unsigned NumSignBits =
821if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
827// If we are only demanding sign bits then we can use the shift source 829if (std::optional<uint64_t> MaxSA =
832unsigned ShAmt = *MaxSA;
833// Must already be signbits in DemandedBits bounds, and can't demand any 836unsigned NumSignBits =
848// If (1) we only need the sign-bit, (2) the setcc operands are the same 849// width as the setcc result, and (3) the result of a setcc conforms to 0 or 850// -1, we may be able to bypass the setcc. 855// If we're testing X < 0, then this compare isn't needed - just use X! 856// FIXME: We're limiting to integer types here, but this should also work 857// if we don't care about FP signed-zero. The use of SETLT with FP means 858// that we don't care about NaNs. 866// If none of the extended bits are demanded, eliminate the sextinreg. 868EVT ExVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
873// If the input is already sign extended, just drop the extension. 875if (NumSignBits >= (
BitWidth - ExBits + 1))
885// If we only want the lowest element and none of extended bits, then we can 886// return the bitcasted source vector. 888EVT SrcVT = Src.getValueType();
889EVT DstVT =
Op.getValueType();
890if (IsLE && DemandedElts == 1 &&
901// If we don't demand the inserted element, return the base vector. 903auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
906 !DemandedElts[CIdx->getZExtValue()])
919// If we don't demand the inserted subvector, return the base vector. 920if (DemandedSubElts == 0)
928// If all the demanded elts are from one operand and are inline, 929// then we can use the operand directly. 930bool AllUndef =
true, IdentityLHS =
true, IdentityRHS =
true;
931for (
unsigned i = 0; i != NumElts; ++i) {
932int M = ShuffleMask[i];
933if (M < 0 || !DemandedElts[i])
936 IdentityLHS &= (M == (int)i);
937 IdentityRHS &= ((M - NumElts) == i);
943returnOp.getOperand(0);
945returnOp.getOperand(1);
949// TODO: Probably okay to remove after audit; here to reduce change size 950// in initial enablement patch for scalable vectors 955if (
SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
966EVT VT =
Op.getValueType();
967// Since the number of lanes in a scalable vector is unknown at compile time, 968// we track one bit which is implicitly broadcast to all lanes. This means 969// that all lanes in a scalable vector are considered demanded. 973return SimplifyMultipleUseDemandedBits(
Op,
DemandedBits, DemandedElts, DAG,
981return SimplifyMultipleUseDemandedBits(
Op,
DemandedBits, DemandedElts, DAG,
985// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1). 986// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1). 993"SRL or SRA node is required here!");
994// Is the right shift using an immediate value of 1? 996if (!N1C || !N1C->
isOne())
999// We are looking for an avgfloor 1001// or one of these as a avgceil 1002// add(add(ext, ext), 1) 1003// add(add(ext, 1), ext) 1004// add(ext, add(ext, 1)) 1036// If the shift is signed (sra): 1037// - Needs >= 2 sign bit for both operands. 1038// - Needs >= 2 zero bits. 1039// If the shift is unsigned (srl): 1040// - Needs >= 1 zero bit for both operands. 1041// - Needs 1 demanded bit zero and >= 2 sign bits. 1043unsigned ShiftOpc =
Op.getOpcode();
1044bool IsSigned =
false;
1048unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1053unsigned NumZero = std::min(NumZeroA, NumZeroB);
1059if (NumZero >= 2 && NumSigned < NumZero) {
1064if (NumSigned >= 1) {
1072if (NumZero >= 1 && NumSigned < NumZero) {
1089// Find the smallest power-2 type that is legal for this vector size and 1090// operation, given the original type size and the number of known sign/zero 1092EVT VT =
Op.getValueType();
1101// If we could not transform, and (both) adds are nuw/nsw, we can use the 1102// larger type size to do the transform. 1106Add.getOperand(1)) &&
1114// Don't create a AVGFLOOR node with a scalar constant unless its legal as 1115// this is likely to stop other folds (reassociation, value tracking etc.) 1117 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1127/// Look at Op. At this point, we know that only the OriginalDemandedBits of the 1128/// result of Op are ever used downstream. If we can use this information to 1129/// simplify Op, create a new simplified DAG node and return true, returning the 1130/// original and new nodes in Old and New. Otherwise, analyze the expression and 1131/// return a mask of Known bits for the expression (used to simplify the 1132/// caller). The Known bits may only be accurate for those bits in the 1133/// OriginalDemandedBits and OriginalDemandedElts. 1137unsignedDepth,
bool AssumeSingleUse)
const{
1140"Mask size mismatches value type size!");
1142// Don't know anything. 1145EVT VT =
Op.getValueType();
1147unsigned NumElts = OriginalDemandedElts.
getBitWidth();
1149"Unexpected vector size");
1152APInt DemandedElts = OriginalDemandedElts;
1159// We can't simplify target constants. 1164// We know all of the bits for a constant! 1170// We know all of the bits for a floating point constant! 1172 cast<ConstantFPSDNode>(
Op)->getValueAPF().bitcastToAPInt());
1176// Other users may use these bits. 1177bool HasMultiUse =
false;
1178if (!AssumeSingleUse && !
Op.getNode()->hasOneUse()) {
1180// Limit search depth. 1183// Allow multiple uses, just set the DemandedBits/Elts to all bits. 1187 }
elseif (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1188// Not demanding any bits/elts from Op. 1191// Limit search depth. 1196switch (
Op.getOpcode()) {
1200if (!DemandedElts[0])
1205unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1207if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO,
Depth + 1))
1210// Upper elements are undef, so only get the knownbits if we just demand 1211// the bottom element. 1212if (DemandedElts == 1)
1217// Collect the known bits that are shared by every demanded element. 1218// TODO: Call SimplifyDemandedBits for non-constant demanded elements. 1220returnfalse;
// Don't fall through, will infinitely loop. 1225if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO,
Depth + 1))
1228// Implicitly truncate the bits to match the official semantics of 1234auto *LD = cast<LoadSDNode>(
Op);
1235if (getTargetConstantFromLoad(LD)) {
1237returnfalse;
// Don't fall through, will infinitely loop. 1240// If this is a ZEXTLoad and we are looking at the loaded value. 1241EVT MemVT = LD->getMemoryVT();
1244returnfalse;
// Don't fall through, will infinitely loop. 1253auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
1256// If index isn't constant, assume we need all vector elements AND the 1258APInt DemandedVecElts(DemandedElts);
1260unsignedIdx = CIdx->getZExtValue();
1263// Inserted element is not required. 1264if (!DemandedElts[
Idx])
1271if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO,
Depth + 1))
1277if (SimplifyDemandedBits(Vec,
DemandedBits, DemandedVecElts, KnownVec, TLO,
1281if (!!DemandedVecElts)
1289// Demand any elements from the subvector and the remainder from the src its 1296APInt DemandedSrcElts = DemandedElts;
1300if (SimplifyDemandedBits(Sub,
DemandedBits, DemandedSubElts, KnownSub, TLO,
1303if (SimplifyDemandedBits(Src,
DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1309if (!!DemandedSubElts)
1311if (!!DemandedSrcElts)
1314// Attempt to avoid multi-use src if we don't need anything from it. 1317SDValue NewSub = SimplifyMultipleUseDemandedBits(
1319SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1321if (NewSub || NewSrc) {
1322 NewSub = NewSub ? NewSub : Sub;
1323 NewSrc = NewSrc ? NewSrc : Src;
1334// Offset the demanded elts by the subvector index. 1336if (Src.getValueType().isScalableVector())
1339unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1342if (SimplifyDemandedBits(Src,
DemandedBits, DemandedSrcElts, Known, TLO,
1346// Attempt to avoid multi-use src if we don't need anything from it. 1348SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1363EVT SubVT =
Op.getOperand(0).getValueType();
1366for (
unsigned i = 0; i != NumSubVecs; ++i) {
1367APInt DemandedSubElts =
1368 DemandedElts.
extractBits(NumSubElts, i * NumSubElts);
1369if (SimplifyDemandedBits(
Op.getOperand(i),
DemandedBits, DemandedSubElts,
1370 Known2, TLO,
Depth + 1))
1372// Known bits are shared by every demanded subvector element. 1373if (!!DemandedSubElts)
1382// Collect demanded elements from shuffle operands.. 1383APInt DemandedLHS, DemandedRHS;
1388if (!!DemandedLHS || !!DemandedRHS) {
1395if (SimplifyDemandedBits(Op0,
DemandedBits, DemandedLHS, Known2, TLO,
1401if (SimplifyDemandedBits(Op1,
DemandedBits, DemandedRHS, Known2, TLO,
1407// Attempt to avoid multi-use ops if we don't need anything from them. 1408SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1410SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1412if (DemandedOp0 || DemandedOp1) {
1413 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1414 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1425// If the RHS is a constant, check to see if the LHS would be zero without 1426// using the bits from the RHS. Below, we use knowledge about the RHS to 1427// simplify the LHS, here we're using information from the LHS to simplify 1430// Do not increment Depth here; that can cause an infinite loop. 1432// If the LHS already has zeros where RHSC does, this 'and' is dead. 1437// If any of the set bits in the RHS are known zero on the LHS, shrink 1443// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its 1444// constant, but if this 'and' is only clearing bits that were just set by 1445// the xor, then this 'and' can be eliminated by shrinking the mask of 1446// the xor. For example, for a 32-bit X: 1447// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 1449 LHSKnown.
One == ~RHSC->getAPIntValue()) {
1455// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I) 1456// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits). 1461unsigned NumSubElts =
1478if (SimplifyDemandedBits(Op1,
DemandedBits, DemandedElts, Known, TLO,
1482 Known2, TLO,
Depth + 1))
1485// If all of the demanded bits are known one on one side, return the other. 1486// These bits cannot contribute to the result of the 'and'. 1491// If all of the demanded bits in the inputs are known zeros, return zero. 1494// If the RHS is a constant, see if we can simplify it. 1498// If the operation can be done in a smaller type, do so. 1502// Attempt to avoid multi-use ops if we don't need anything from them. 1504SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1506SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1508if (DemandedOp0 || DemandedOp1) {
1509 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1522if (SimplifyDemandedBits(Op1,
DemandedBits, DemandedElts, Known, TLO,
1529 Known2, TLO,
Depth + 1)) {
1534// If all of the demanded bits are known zero on one side, return the other. 1535// These bits cannot contribute to the result of the 'or'. 1540// If the RHS is a constant, see if we can simplify it. 1543// If the operation can be done in a smaller type, do so. 1547// Attempt to avoid multi-use ops if we don't need anything from them. 1549SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1551SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1553if (DemandedOp0 || DemandedOp1) {
1554 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1555 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1561// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) 1562// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks. 1565// Attempt to match all commutations - m_c_Or would've been useful! 1566for (
intI = 0;
I != 2; ++
I) {
1569SDValue Alt =
Op.getOperand(1 -
I).getOperand(0);
1570SDValue C2 =
Op.getOperand(1 -
I).getOperand(1);
1572for (
int J = 0; J != 2; ++J) {
1595if (SimplifyDemandedBits(Op1,
DemandedBits, DemandedElts, Known, TLO,
1598if (SimplifyDemandedBits(Op0,
DemandedBits, DemandedElts, Known2, TLO,
1602// If all of the demanded bits are known zero on one side, return the other. 1603// These bits cannot contribute to the result of the 'xor'. 1608// If the operation can be done in a smaller type, do so. 1612// If all of the unknown bits are known to be zero on one side or the other 1613// turn this into an *inclusive* or. 1614// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 1620// If one side is a constant, and all of the set bits in the constant are 1621// also known set on the other side, turn this into an AND, as we know 1622// the bits will be cleared. 1623// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 1624// NB: it is okay if more bits are known than are requested 1625if (
C->getAPIntValue() == Known2.
One) {
1631// If the RHS is a constant, see if we can change it. Don't alter a -1 1632// constant because that's a 'not' op, and that is better for combining 1634if (!
C->isAllOnes() &&
DemandedBits.isSubsetOf(
C->getAPIntValue())) {
1635// We're flipping all demanded bits. Flip the undemanded bits too. 1644// Don't crash on an oversized shift. We can not guarantee that a 1645// bogus shift has been simplified to undef. 1646if (ShiftC->getAPIntValue().ult(
BitWidth)) {
1647uint64_t ShiftAmt = ShiftC->getZExtValue();
1650 : Ones.
lshr(ShiftAmt);
1652 isDesirableToCommuteXorWithShift(
Op.getNode())) {
1653// If the xor constant is a demanded mask, do a 'not' before the 1655// xor (X << ShiftC), XorC --> (not X) << ShiftC 1656// xor (X >> ShiftC), XorC --> (not X) >> ShiftC 1666// If we can't turn this into a 'not', try to shrink the constant. 1667if (!
C || !
C->isAllOnes())
1671// Attempt to avoid multi-use ops if we don't need anything from them. 1673SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1675SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1677if (DemandedOp0 || DemandedOp1) {
1678 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1679 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1689if (SimplifyDemandedBits(
Op.getOperand(2),
DemandedBits, DemandedElts,
1690 Known, TLO,
Depth + 1))
1692if (SimplifyDemandedBits(
Op.getOperand(1),
DemandedBits, DemandedElts,
1693 Known2, TLO,
Depth + 1))
1696// If the operands are constants, see if we can simplify them. 1700// Only known if known in both the LHS and RHS. 1704if (SimplifyDemandedBits(
Op.getOperand(2),
DemandedBits, DemandedElts,
1705 Known, TLO,
Depth + 1))
1707if (SimplifyDemandedBits(
Op.getOperand(1),
DemandedBits, DemandedElts,
1708 Known2, TLO,
Depth + 1))
1711// Only known if known in both the LHS and RHS. 1715if (SimplifyDemandedBits(
Op.getOperand(3),
DemandedBits, DemandedElts,
1716 Known, TLO,
Depth + 1))
1718if (SimplifyDemandedBits(
Op.getOperand(2),
DemandedBits, DemandedElts,
1719 Known2, TLO,
Depth + 1))
1722// If the operands are constants, see if we can simplify them. 1726// Only known if known in both the LHS and RHS. 1733// If (1) we only need the sign-bit, (2) the setcc operands are the same 1734// width as the setcc result, and (3) the result of a setcc conforms to 0 or 1735// -1, we may be able to bypass the setcc. 1740// If we're testing X < 0, then this compare isn't needed - just use X! 1741// FIXME: We're limiting to integer types here, but this should also work 1742// if we don't care about FP signed-zero. The use of SETLT with FP means 1743// that we don't care about NaNs. 1748// TODO: Should we check for other forms of sign-bit comparisons? 1749// Examples: X <= -1, X >= 0 1762if (std::optional<uint64_t> KnownSA =
1764unsigned ShAmt = *KnownSA;
1768// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a 1769// single shift. We can do this if the bottom bits (which are shifted 1770// out) are never demanded. 1771// TODO - support non-uniform vector amounts. 1774if (std::optional<uint64_t> InnerSA =
1776unsigned C1 = *InnerSA;
1778int Diff = ShAmt - C1;
1790// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits 1791// are not demanded. This will likely allow the anyext to be folded away. 1792// TODO - support non-uniform vector amounts. 1797if (ShAmt < InnerBits &&
DemandedBits.getActiveBits() <= InnerBits &&
1798 isTypeDesirableForOp(
ISD::SHL, InnerVT)) {
1806// Repeat the SHL optimization above in cases where an extension 1807// intervenes: (shl (anyext (shr x, c1)), c2) to 1808// (shl (anyext x), c2-c1). This requires that the bottom c1 bits 1809// aren't demanded (as above) and that the shifted upper c1 bits of 1810// x aren't demanded. 1811// TODO - support non-uniform vector amounts. 1815 InnerOp, DemandedElts,
Depth + 2)) {
1816unsigned InnerShAmt = *SA2;
1817if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1819 (InnerBits - InnerShAmt + ShAmt) &&
1833if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1835// Disable the nsw and nuw flags. We can no longer guarantee that we 1836// won't wrap after simplification. 1840 Known.
Zero <<= ShAmt;
1841 Known.
One <<= ShAmt;
1842// low bits known zero. 1845// Attempt to avoid multi-use ops if we don't need anything from them. 1847SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1848 Op0, InDemandedMask, DemandedElts, TLO.
DAG,
Depth + 1);
1855// TODO: Can we merge this fold with the one below? 1856// Try shrinking the operation as long as the shift amount will still be 1859Op.getNode()->hasOneUse()) {
1860// Search for the smallest integer type with free casts to and from 1861// Op's type. For expedience, just check power-of-2 integer types. 1867 isTypeDesirableForOp(
ISD::SHL, SmallVT) &&
1870assert(DemandedSize <= SmallVTBits &&
1871"Narrowed below demanded bits?");
1872// We found a type with free casts. 1883// Narrow shift to lower half - similar to ShrinkDemandedOp. 1884// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K)) 1885// Only do this if we demand the upper half so the knownbits are correct. 1891 isTypeDesirableForOp(
ISD::SHL, HalfVT) &&
1894// If we're demanding the upper bits at all, we must ensure 1895// that the upper bits of the shift result are known to be zero, 1896// which is equivalent to the narrow shift being NUW. 1900 Flags.setNoSignedWrap(IsNSW);
1901 Flags.setNoUnsignedWrap(IsNUW);
1906 NewShiftAmt, Flags);
1914// This is a variable shift, so we can't shift the demand mask by a known 1915// amount. But if we are not demanding high bits, then we are not 1916// demanding those bits from the pre-shifted operand either. 1919if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1921// Disable the nsw and nuw flags. We can no longer guarantee that we 1922// won't wrap after simplification. 1930// If we are only demanding sign bits then we can use the shift source 1932if (std::optional<uint64_t> MaxSA =
1934unsigned ShAmt = *MaxSA;
1935unsigned NumSignBits =
1938if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1948if (std::optional<uint64_t> KnownSA =
1950unsigned ShAmt = *KnownSA;
1954// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a 1955// single shift. We can do this if the top bits (which are shifted out) 1956// are never demanded. 1957// TODO - support non-uniform vector amounts. 1960if (std::optional<uint64_t> InnerSA =
1962unsigned C1 = *InnerSA;
1964int Diff = ShAmt - C1;
1976// If this is (srl (sra X, C1), ShAmt), see if we can combine this into a 1977// single sra. We can do this if the top bits are never demanded. 1980if (std::optional<uint64_t> InnerSA =
1982unsigned C1 = *InnerSA;
1983// Clamp the combined shift amount if it exceeds the bit width. 1984unsigned Combined = std::min(C1 + ShAmt,
BitWidth - 1);
1994// If the shift is exact, then it does demand the low bits (and knows that 1996if (
Op->getFlags().hasExact())
1999// Narrow shift to lower half - similar to ShrinkDemandedOp. 2000// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K)) 2005 isTypeDesirableForOp(
ISD::SRL, HalfVT) &&
2020// Compute the new bits that are at the top now. 2021if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2026// High bits known zero. 2029// Attempt to avoid multi-use ops if we don't need anything from them. 2031SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2032 Op0, InDemandedMask, DemandedElts, TLO.
DAG,
Depth + 1);
2039// Use generic knownbits computation as it has support for non-uniform 2044// If we are only demanding sign bits then we can use the shift source 2046if (std::optional<uint64_t> MaxSA =
2048unsigned ShAmt = *MaxSA;
2049// Must already be signbits in DemandedBits bounds, and can't demand any 2050// shifted in zeroes. 2052unsigned NumSignBits =
2059// Try to match AVG patterns (after shift simplification). 2061 DemandedElts,
Depth + 1))
2071// If we only want bits that already match the signbit then we don't need 2078// If this is an arithmetic shift right and only the low-bit is set, we can 2079// always convert this into a logical shr, even if the shift amount is 2080// variable. The low bit of the shift cannot be an input sign bit unless 2081// the shift amount is >= the size of the datatype, which is undefined. 2085if (std::optional<uint64_t> KnownSA =
2087unsigned ShAmt = *KnownSA;
2091// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target 2092// supports sext_inreg. 2094if (std::optional<uint64_t> InnerSA =
2102if (*InnerSA == ShAmt) {
2110// Even if we can't convert to sext_inreg, we might be able to 2111// remove this shift pair if the input is already sign extended. 2112unsigned NumSignBits =
2114if (NumSignBits > ShAmt)
2122// If the shift is exact, then it does demand the low bits (and knows that 2124if (
Op->getFlags().hasExact())
2127// If any of the demanded bits are produced by the sign extension, we also 2128// demand the input sign bit. 2132if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2138// If the input sign bit is known to be zero, or if none of the top bits 2139// are demanded, turn this into an unsigned shift right. 2143 Flags.setExact(
Op->getFlags().hasExact());
2150// The bit must come from the sign. 2156// New bits are known one. 2159// Attempt to avoid multi-use ops if we don't need anything from them. 2161SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2162 Op0, InDemandedMask, DemandedElts, TLO.
DAG,
Depth + 1);
2170// Try to match AVG patterns (after shift simplification). 2172 DemandedElts,
Depth + 1))
2185unsigned Amt = SA->getAPIntValue().urem(
BitWidth);
2187// For fshl, 0-shift returns the 1st arg. 2188// For fshr, 0-shift returns the 2nd arg. 2190if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1,
DemandedBits, DemandedElts,
2191 Known, TLO,
Depth + 1))
2196// fshl: (Op0 << Amt) | (Op1 >> (BW - Amt)) 2197// fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt) 2200if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2203if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2213// Attempt to avoid multi-use ops if we don't need anything from them. 2216SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2217 Op0, Demanded0, DemandedElts, TLO.
DAG,
Depth + 1);
2218SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2219 Op1, Demanded1, DemandedElts, TLO.
DAG,
Depth + 1);
2220if (DemandedOp0 || DemandedOp1) {
2221 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2222 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2230// For pow-2 bitwidths we only demand the bottom modulo amt bits. 2233if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2234 Known2, TLO,
Depth + 1))
2245// If we're rotating an 0/-1 value, then it stays an 0/-1 value. 2250unsigned Amt = SA->getAPIntValue().urem(
BitWidth);
2253// rotl: (Op0 << Amt) | (Op0 >> (BW - Amt)) 2254// rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt) 2256if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2264// See if we don't demand either half of the rotated bits. 2266DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2271DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2277// For pow-2 bitwidths we only demand the bottom modulo amt bits. 2280if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2290unsigned Opc =
Op.getOpcode();
2294// If we're only demanding signbits, then we can simplify to OR/AND node. 2297unsigned NumSignBits =
2301if (NumSignBits >= NumDemandedUpperBits)
2304// Check if one arg is always less/greater than (or equal) to the other arg. 2342if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2352// If the only bits demanded come from one byte of the bswap result, 2353// just shift the input byte into position to eliminate the bswap. 2357// Round NTZ down to the next byte. If we have 11 trailing zeros, then 2358// we need all the bits down to bit 8. Likewise, round NLZ. If we 2359// have 14 leading zeros, round to 8. 2362// If we need exactly one byte, we can do this transformation. 2364// Replace this with either a left or right shift to get the byte into 2368unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2376if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2384// If only 1 bit is demanded, replace with PARITY as long as we're before 2386// FIXME: Limit to scalars for now. 2396EVT ExVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
2399// If we only care about the highest bit, don't bother shifting right. 2401unsigned MinSignedBits =
2403bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2404// However if the input is already sign extended we expect the sign 2405// extension to be dropped altogether later and do not simplify. 2406if (!AlreadySignExtended) {
2407// Compute the correct shift amount type, which must be getShiftAmountTy 2408// for scalar types after legalization. 2416// If none of the extended bits are demanded, eliminate the sextinreg. 2422// Since the sign extended bits are demanded, we know that the sign 2424 InputDemandedBits.
setBit(ExVTBits - 1);
2426if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2430// If the sign bit of the input is known set or clear, then we know the 2431// top bits of the result. 2433// If the input sign bit is known zero, convert this into a zero extension. 2434if (Known.
Zero[ExVTBits - 1])
2438if (Known.
One[ExVTBits - 1]) {
// Input sign bit known set 2441 }
else {
// Input sign bit unknown 2448EVT HalfVT =
Op.getOperand(0).getValueType();
2456if (SimplifyDemandedBits(
Op.getOperand(0), MaskLo, KnownLo, TLO,
Depth + 1))
2459if (SimplifyDemandedBits(
Op.getOperand(1), MaskHi, KnownHi, TLO,
Depth + 1))
2462 Known = KnownHi.
concat(KnownLo);
2471EVT SrcVT = Src.getValueType();
2476// If none of the top bits are demanded, convert this into an any_extend. 2478// If we only need the non-extended bits of the bottom element 2479// then we can just bitcast to the result. 2480if (IsLE && IsVecInReg && DemandedElts == 1 &&
2491APInt InDemandedElts = DemandedElts.
zext(InElts);
2492if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2500// Attempt to avoid multi-use ops if we don't need anything from them. 2501if (
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2502 Src, InDemandedBits, InDemandedElts, TLO.
DAG,
Depth + 1))
2512EVT SrcVT = Src.getValueType();
2517APInt InDemandedElts = DemandedElts.
zext(InElts);
2520// Since some of the sign extended bits are demanded, we know that the sign 2522 InDemandedBits.
setBit(InBits - 1);
2524// If none of the top bits are demanded, convert this into an any_extend. 2526// If we only need the non-extended bits of the bottom element 2527// then we can just bitcast to the result. 2528if (IsLE && IsVecInReg && DemandedElts == 1 &&
2532// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans. 2543if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2548// If the sign bit is known one, the top bits match. 2551// If the sign bit is known zero, convert this to a zero extend. 2563// Attempt to avoid multi-use ops if we don't need anything from them. 2564if (
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2565 Src, InDemandedBits, InDemandedElts, TLO.
DAG,
Depth + 1))
2575EVT SrcVT = Src.getValueType();
2580// If we only need the bottom element then we can just bitcast. 2581// TODO: Handle ANY_EXTEND? 2582if (IsLE && IsVecInReg && DemandedElts == 1 &&
2587APInt InDemandedElts = DemandedElts.
zext(InElts);
2588if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2594// Attempt to avoid multi-use ops if we don't need anything from them. 2595if (
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2596 Src, InDemandedBits, InDemandedElts, TLO.
DAG,
Depth + 1))
2603// Simplify the input, using demanded bit information, and compute the known 2604// zero/one bits live out. 2605unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2607if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2609// Disable the nsw and nuw flags. We can no longer guarantee that we 2610// won't wrap after simplification. 2616// Attempt to avoid multi-use ops if we don't need anything from them. 2617if (
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2618 Src, TruncMask, DemandedElts, TLO.
DAG,
Depth + 1))
2621// If the input is only used by this truncate, see if we can shrink it based 2622// on the known demanded bits. 2623switch (Src.getOpcode()) {
2627// Shrink SRL by a constant if none of the high bits shifted in are 2630// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is 2634if (Src.getNode()->hasOneUse()) {
2637// If truncate is only free at trunc(srl), do not turn it into 2638// srl(trunc). The check is done by first check the truncate is free 2639// at Src's opcode(srl), then check the truncate is not done by 2640// referencing sub-register. In test, if both trunc(srl) and 2641// srl(trunc)'s trunc are free, srl(trunc) performs better. If only 2642// trunc(srl)'s trunc is free, trunc(srl) is better. 2646 std::optional<uint64_t> ShAmtC =
2657// None of the shifted in bits are needed. Add a truncate of the 2658// shift input, then shift it. 2672// AssertZext demands all of the high bits, plus any of the low bits 2673// demanded by its users. 2674EVT ZVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
2676if (SimplifyDemandedBits(
Op.getOperand(0), ~InMask |
DemandedBits, Known,
2680 Known.
Zero |= ~InMask;
2681 Known.
One &= (~Known.Zero);
2687ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2688unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2693// Demand the bits from every vector element without a constant index. 2696if (
auto *CIdx = dyn_cast<ConstantSDNode>(
Idx))
2697if (CIdx->getAPIntValue().ult(NumSrcElts))
2700// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know 2701// anything about the extended bits. 2704 DemandedSrcBits = DemandedSrcBits.
trunc(EltBitWidth);
2706if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2710// Attempt to avoid multi-use ops if we don't need anything from them. 2712if (
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2713 Src, DemandedSrcBits, DemandedSrcElts, TLO.
DAG,
Depth + 1)) {
2729EVT SrcVT = Src.getValueType();
2732// If this is an FP->Int bitcast and if the sign bit is the only 2733// thing demanded, turn this into a FGETSIGN. 2739if ((OpVTLegal || i32Legal) && VT.
isSimple() && SrcVT != MVT::f16 &&
2740 SrcVT != MVT::f128) {
2741// Cannot eliminate/lower SHL for f128 yet. 2742EVT Ty = OpVTLegal ? VT : MVT::i32;
2743// Make a FGETSIGN + SHL to move the sign bit into the appropriate 2744// place. We expect the SHL to be eliminated by other optimizations. 2746unsigned OpVTSizeInBits =
Op.getValueSizeInBits();
2747if (!OpVTLegal && OpVTSizeInBits > 32)
2749unsigned ShVal =
Op.getValueSizeInBits() - 1;
2756// Bitcast from a vector using SimplifyDemanded Bits/VectorElts. 2757// Demand the elt/bit if any of the original elts/bits are demanded. 2759unsigned Scale =
BitWidth / NumSrcEltBits;
2763for (
unsigned i = 0; i != Scale; ++i) {
2764unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2765unsigned BitOffset = EltOffset * NumSrcEltBits;
2768 DemandedSrcBits |= Sub;
2769for (
unsigned j = 0; j != NumElts; ++j)
2771 DemandedSrcElts.
setBit((j * Scale) + i);
2775APInt KnownSrcUndef, KnownSrcZero;
2776if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2777 KnownSrcZero, TLO,
Depth + 1))
2781if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2782 KnownSrcBits, TLO,
Depth + 1))
2784 }
elseif (IsLE && (NumSrcEltBits %
BitWidth) == 0) {
2785// TODO - bigendian once we have test coverage. 2786unsigned Scale = NumSrcEltBits /
BitWidth;
2790for (
unsigned i = 0; i != NumElts; ++i)
2791if (DemandedElts[i]) {
2794 DemandedSrcElts.
setBit(i / Scale);
2798APInt KnownSrcUndef, KnownSrcZero;
2799if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2800 KnownSrcZero, TLO,
Depth + 1))
2805if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2806 KnownSrcBits, TLO,
Depth + 1))
2809// Attempt to avoid multi-use ops if we don't need anything from them. 2811if (
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2812 Src, DemandedSrcBits, DemandedSrcElts, TLO.
DAG,
Depth + 1)) {
2819// If this is a bitcast, let computeKnownBits handle it. Only do this on a 2820// recursive call where Known may be useful to the caller. 2829// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. 2830// If we demand exactly one bit N and we have "X * (C' << N)" where C' is 2831// odd (has LSB set), then the left-shifted low bit of X is the answer. 2834if (
C &&
C->getAPIntValue().countr_zero() == CTZ) {
2840// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: 2841// X * X is odd iff X is odd. 2842// 'Quadratic Reciprocity': X * X -> 0 for bit[1] 2851// Add, Sub, and Mul don't demand any bits in positions beyond that 2852// of the highest bit demanded of them. 2853SDValue Op0 =
Op.getOperand(0), Op1 =
Op.getOperand(1);
2858auto GetDemandedBitsLHSMask = [&](
APInt Demanded,
2864if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2866 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2867 DemandedElts, KnownOp0, TLO,
Depth + 1) ||
2868// See if the operation should be performed at a smaller bit width. 2870// Disable the nsw and nuw flags. We can no longer guarantee that we 2871// won't wrap after simplification. 2876// neg x with only low bit demanded is simply x. 2881// Attempt to avoid multi-use ops if we don't need anything from them. 2883SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2884 Op0, LoMask, DemandedElts, TLO.
DAG,
Depth + 1);
2885SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2886 Op1, LoMask, DemandedElts, TLO.
DAG,
Depth + 1);
2887if (DemandedOp0 || DemandedOp1) {
2888 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2889 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2896// If we have a constant operand, we may be able to turn it into -1 if we 2897// do not demand the high bits. This can make the constant smaller to 2898// encode, allow more general folding, or match specialized instruction 2899// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that 2900// is probably not useful (and could be detrimental). 2903if (
C && !
C->isAllOnes() && !
C->isOne() &&
2904 (
C->getAPIntValue() | HighMask).isAllOnes()) {
2906// Disable the nsw and nuw flags. We can no longer guarantee that we 2907// won't wrap after simplification. 2913// Match a multiply with a disguised negated-power-of-2 and convert to a 2914// an equivalent shift-left amount. 2915// Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) 2916auto getShiftLeftAmt = [&HighMask](
SDValueMul) ->
unsigned {
2920// Don't touch opaque constants. Also, ignore zero and power-of-2 2921// multiplies. Those will get folded later. 2942// (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) 2943if (
unsigned ShAmt = getShiftLeftAmt(Op0))
2945// Op0 + (X * MulC) --> Op0 - (X << log2(-MulC)) 2946if (
unsigned ShAmt = getShiftLeftAmt(Op1))
2947return foldMul(
ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2950// Op0 - (X * MulC) --> Op0 + (X << log2(-MulC)) 2951if (
unsigned ShAmt = getShiftLeftAmt(Op1))
2952return foldMul(
ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2958 }
else {
// Op.getOpcode() is either ISD::ADD or ISD::SUB. 2960Op.getOpcode() ==
ISD::ADD, Flags.hasNoSignedWrap(),
2961 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2966// We also ask the target about intrinsics (which could be specific to it). 2969// TODO: Probably okay to remove after audit; here to reduce change size 2970// in initial enablement patch for scalable vectors 2971if (
Op.getValueType().isScalableVector())
2973if (SimplifyDemandedBitsForTargetNode(
Op,
DemandedBits, DemandedElts,
2979// Just use computeKnownBits to compute output bits. 2984// If we know the value of all of the demanded bits, return this as a 2986if (!isTargetCanonicalConstantNode(
Op) &&
2988// Avoid folding to a constant if any OpaqueConstant is involved. 2990 auto *C = dyn_cast<ConstantSDNode>(V);
2991 return C && C->isOpaque();
3002// A multi use 'all demanded elts' simplify failed to find any knownbits. 3003// Try again just for the original demanded elts. 3004// Ensure we do this AFTER constant folding above. 3012constAPInt &DemandedElts,
3018APInt KnownUndef, KnownZero;
3020 SimplifyDemandedVectorElts(
Op, DemandedElts, KnownUndef, KnownZero, TLO);
3029/// Given a vector binary operation and known undefined elements for each input 3030/// operand, compute whether each element of the output is undefined. 3032constAPInt &UndefOp0,
3033constAPInt &UndefOp1) {
3036"Vector binop only");
3041 UndefOp1.
getBitWidth() == NumElts &&
"Bad type for undef analysis");
3043auto getUndefOrConstantElt = [&](
SDValue V,
unsigned Index,
3044constAPInt &UndefVals) {
3045if (UndefVals[Index])
3048if (
auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3049// Try hard to make sure that the getNode() call is not creating temporary 3050// nodes. Ignore opaque integers because they do not constant fold. 3052auto *
C = dyn_cast<ConstantSDNode>(Elt);
3053if (isa<ConstantFPSDNode>(Elt) || Elt.
isUndef() || (
C && !
C->isOpaque()))
3061for (
unsigned i = 0; i != NumElts; ++i) {
3062// If both inputs for this element are either constant or undef and match 3063// the element type, compute the constant/undef result for this element of 3065// TODO: Ideally we would use FoldConstantArithmetic() here, but that does 3066// not handle FP constants. The code within getNode() should be refactored 3067// to avoid the danger of creating a bogus temporary node here. 3080bool AssumeSingleUse)
const{
3081EVT VT =
Op.getValueType();
3082unsigned Opcode =
Op.getOpcode();
3083APInt DemandedElts = OriginalDemandedElts;
3089if (!shouldSimplifyDemandedVectorElts(
Op, TLO))
3092// TODO: For now we assume we know nothing about scalable vectors. 3097"Mask size mismatches value type element count!");
3105// If Op has other users, assume that all elements are needed. 3106if (!AssumeSingleUse && !
Op.getNode()->hasOneUse())
3109// Not demanding any elements from Op. 3110if (DemandedElts == 0) {
3115// Limit search depth. 3123// Helper for demanding the specified elements and all the bits of both binary 3125auto SimplifyDemandedVectorEltsBinOp = [&](
SDValue Op0,
SDValue Op1) {
3126SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3128SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3130if (NewOp0 || NewOp1) {
3133 NewOp1 ? NewOp1 : Op1,
Op->getFlags());
3141if (!DemandedElts[0]) {
3149EVT SrcVT = Src.getValueType();
3161if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3171EVT SrcVT = Src.getValueType();
3173// We only handle vectors here. 3174// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits? 3178// Fast handling of 'identity' bitcasts. 3180if (NumSrcElts == NumElts)
3181return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3182 KnownZero, TLO,
Depth + 1);
3184APInt SrcDemandedElts, SrcZero, SrcUndef;
3186// Bitcast from 'large element' src vector to 'small element' vector, we 3187// must demand a source element if any DemandedElt maps to it. 3188if ((NumElts % NumSrcElts) == 0) {
3189unsigned Scale = NumElts / NumSrcElts;
3191if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3195// Try calling SimplifyDemandedBits, converting demanded elts to the bits 3196// of the large element. 3197// TODO - bigendian once we have test coverage. 3201for (
unsigned i = 0; i != NumElts; ++i)
3202if (DemandedElts[i]) {
3203unsigned Ofs = (i % Scale) * EltSizeInBits;
3204 SrcDemandedBits.
setBits(Ofs, Ofs + EltSizeInBits);
3208if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3212// The bitcast has split each wide element into a number of 3213// narrow subelements. We have just computed the Known bits 3214// for wide elements. See if element splitting results in 3215// some subelements being zero. Only for demanded elements! 3216for (
unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3220for (
unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3221unsigned Elt = Scale * SrcElt + SubElt;
3222if (DemandedElts[Elt])
3228// If the src element is zero/undef then all the output elements will be - 3229// only demanded elements are guaranteed to be correct. 3230for (
unsigned i = 0; i != NumSrcElts; ++i) {
3231if (SrcDemandedElts[i]) {
3233 KnownZero.
setBits(i * Scale, (i + 1) * Scale);
3235 KnownUndef.
setBits(i * Scale, (i + 1) * Scale);
3240// Bitcast from 'small element' src vector to 'large element' vector, we 3241// demand all smaller source elements covered by the larger demanded element 3243if ((NumSrcElts % NumElts) == 0) {
3244unsigned Scale = NumSrcElts / NumElts;
3246if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3250// If all the src elements covering an output element are zero/undef, then 3251// the output element will be as well, assuming it was demanded. 3252for (
unsigned i = 0; i != NumElts; ++i) {
3253if (DemandedElts[i]) {
3266/*PoisonOnly=*/false))
3269// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE 3270// freeze(op(x, ...)) -> op(freeze(x), ...). 3278// Check all elements and simplify any unused elements with UNDEF. 3280// Don't simplify BROADCASTS. 3282 [&](
SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3285for (
unsigned i = 0; i != NumElts; ++i) {
3286if (!DemandedElts[i] && !Ops[i].
isUndef()) {
3296for (
unsigned i = 0; i != NumElts; ++i) {
3298if (
SrcOp.isUndef()) {
3300 }
elseif (EltSizeInBits ==
SrcOp.getScalarValueSizeInBits() &&
3308EVT SubVT =
Op.getOperand(0).getValueType();
3311for (
unsigned i = 0; i != NumSubVecs; ++i) {
3314APInt SubUndef, SubZero;
3315if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3318 KnownUndef.
insertBits(SubUndef, i * NumSubElts);
3319 KnownZero.
insertBits(SubZero, i * NumSubElts);
3322// Attempt to avoid multi-use ops if we don't need anything from them. 3324bool FoundNewSub =
false;
3326for (
unsigned i = 0; i != NumSubVecs; ++i) {
3329SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3330 SubOp, SubElts, TLO.
DAG,
Depth + 1);
3331 DemandedSubOps.
push_back(NewSubOp ? NewSubOp : SubOp);
3332 FoundNewSub = NewSubOp ?
true : FoundNewSub;
3343// Demand any elements from the subvector and the remainder from the src its 3350APInt DemandedSrcElts = DemandedElts;
3353APInt SubUndef, SubZero;
3354if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3358// If none of the src operand elements are demanded, replace it with undef. 3359if (!DemandedSrcElts && !Src.isUndef())
3364if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3370// Attempt to avoid multi-use ops if we don't need anything from them. 3372SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3373 Src, DemandedSrcElts, TLO.
DAG,
Depth + 1);
3374SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3375 Sub, DemandedSubElts, TLO.
DAG,
Depth + 1);
3376if (NewSrc || NewSub) {
3377 NewSrc = NewSrc ? NewSrc : Src;
3378 NewSub = NewSub ? NewSub : Sub;
3380 NewSub,
Op.getOperand(2));
3387// Offset the demanded elts by the subvector index. 3389if (Src.getValueType().isScalableVector())
3392unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3395APInt SrcUndef, SrcZero;
3396if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3402// Attempt to avoid multi-use ops if we don't need anything from them. 3404SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3405 Src, DemandedSrcElts, TLO.
DAG,
Depth + 1);
3417auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
3419// For a legal, constant insertion index, if we don't need this insertion 3420// then strip it, else remove it from the demanded elts. 3421if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3422unsignedIdx = CIdx->getZExtValue();
3423if (!DemandedElts[
Idx])
3426APInt DemandedVecElts(DemandedElts);
3428if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3429 KnownZero, TLO,
Depth + 1))
3438APInt VecUndef, VecZero;
3439if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3442// Without knowing the insertion index we can't set KnownUndef/KnownZero. 3450// Try to transform the select condition based on the current demanded 3452APInt UndefSel, ZeroSel;
3453if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3457// See if we can simplify either vselect operand. 3458APInt DemandedLHS(DemandedElts);
3459APInt DemandedRHS(DemandedElts);
3460APInt UndefLHS, ZeroLHS;
3461APInt UndefRHS, ZeroRHS;
3462if (SimplifyDemandedVectorElts(
LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3465if (SimplifyDemandedVectorElts(
RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3469 KnownUndef = UndefLHS & UndefRHS;
3470 KnownZero = ZeroLHS & ZeroRHS;
3472// If we know that the selected element is always zero, we don't need the 3473// select value element. 3474APInt DemandedSel = DemandedElts & ~KnownZero;
3475if (DemandedSel != DemandedElts)
3476if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3487// Collect demanded elements from shuffle operands.. 3488APInt DemandedLHS(NumElts, 0);
3489APInt DemandedRHS(NumElts, 0);
3490for (
unsigned i = 0; i != NumElts; ++i) {
3491int M = ShuffleMask[i];
3492if (M < 0 || !DemandedElts[i])
3494assert(0 <= M && M < (
int)(2 * NumElts) &&
"Shuffle index out of range");
3495if (M < (
int)NumElts)
3498 DemandedRHS.
setBit(M - NumElts);
3501// See if we can simplify either shuffle operand. 3502APInt UndefLHS, ZeroLHS;
3503APInt UndefRHS, ZeroRHS;
3504if (SimplifyDemandedVectorElts(
LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3507if (SimplifyDemandedVectorElts(
RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3511// Simplify mask using undef elements from LHS/RHS. 3513bool IdentityLHS =
true, IdentityRHS =
true;
3515for (
unsigned i = 0; i != NumElts; ++i) {
3519if (!DemandedElts[i] || (M < (
int)NumElts && UndefLHS[M]) ||
3520 (M >= (
int)NumElts && UndefRHS[M - NumElts])) {
3524 IdentityLHS &= (M < 0) || (M == (
int)i);
3525 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3528// Update legal shuffle masks based on demanded elements if it won't reduce 3529// to Identity which can cause premature removal of the shuffle mask. 3530if (Updated && !IdentityLHS && !IdentityRHS && !TLO.
LegalOps) {
3532 buildLegalVectorShuffle(VT,
DL,
LHS,
RHS, NewMask, TLO.
DAG);
3537// Propagate undef/zero elements from LHS/RHS. 3538for (
unsigned i = 0; i != NumElts; ++i) {
3539int M = ShuffleMask[i];
3542 }
elseif (M < (
int)NumElts) {
3548if (UndefRHS[M - NumElts])
3550if (ZeroRHS[M - NumElts])
3559APInt SrcUndef, SrcZero;
3561unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3562APInt DemandedSrcElts = DemandedElts.
zext(NumSrcElts);
3563if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3570Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3571 DemandedSrcElts == 1) {
3572// aext - if we just need the bottom element then we can bitcast. 3577// zext(undef) upper bits are guaranteed to be zero. 3582// zext - if we just need the bottom element then we can mask: 3583// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. 3584if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() ==
ISD::AND &&
3585Op->isOnlyUserOf(Src.getNode()) &&
3586Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3588EVT SrcVT = Src.getValueType();
3595ISD::AND,
DL, SrcVT, {Src.getOperand(1), Mask})) {
3604// TODO: There are more binop opcodes that could be handled here - MIN, 3605// MAX, saturated math, etc. 3609if (Op0 == Op1 &&
Op->isOnlyUserOf(Op0.
getNode())) {
3610APInt UndefLHS, ZeroLHS;
3611if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3612Depth + 1,
/*AssumeSingleUse*/true))
3632APInt UndefRHS, ZeroRHS;
3633if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3636APInt UndefLHS, ZeroLHS;
3637if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3641 KnownZero = ZeroLHS & ZeroRHS;
3644// Attempt to avoid multi-use ops if we don't need anything from them. 3645// TODO - use KnownUndef to relax the demandedelts? 3647if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3659APInt UndefRHS, ZeroRHS;
3660if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3663APInt UndefLHS, ZeroLHS;
3664if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3668 KnownZero = ZeroLHS;
3669 KnownUndef = UndefLHS & UndefRHS;
// TODO: use getKnownUndefForVectorBinop? 3671// Attempt to avoid multi-use ops if we don't need anything from them. 3672// TODO - use KnownUndef to relax the demandedelts? 3674if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3685APInt SrcUndef, SrcZero;
3686if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3689// If we know that a demanded element was zero in Op1 we don't need to 3690// demand it in Op0 - its guaranteed to be zero. 3691APInt DemandedElts0 = DemandedElts & ~SrcZero;
3692if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3696 KnownUndef &= DemandedElts0;
3697 KnownZero &= DemandedElts0;
3699// If every element pair has a zero/undef then just fold to zero. 3700// fold (and x, undef) -> 0 / (and x, 0) -> 0 3701// fold (mul x, undef) -> 0 / (mul x, 0) -> 0 3702if (DemandedElts.
isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3705// If either side has a zero element, then the result element is zero, even 3706// if the other is an UNDEF. 3707// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros 3708// and then handle 'and' nodes with the rest of the binop opcodes. 3709 KnownZero |= SrcZero;
3710 KnownUndef &= SrcUndef;
3711 KnownUndef &= ~KnownZero;
3713// Attempt to avoid multi-use ops if we don't need anything from them. 3715if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3722if (SimplifyDemandedVectorElts(
Op.getOperand(0), DemandedElts, KnownUndef,
3723 KnownZero, TLO,
Depth + 1))
3727if (
SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3728Op.getOperand(0), DemandedElts, TLO.
DAG,
Depth + 1))
3732// zext(undef) upper bits are guaranteed to be zero. 3742if (SimplifyDemandedVectorElts(
Op.getOperand(0), DemandedElts, KnownUndef,
3743 KnownZero, TLO,
Depth + 1))
3745// Don't fall through to generic undef -> undef handling. 3749if (SimplifyDemandedVectorEltsForTargetNode(
Op, DemandedElts, KnownUndef,
3750 KnownZero, TLO,
Depth))
3755if (SimplifyDemandedBits(
Op,
DemandedBits, OriginalDemandedElts, Known,
3756 TLO,
Depth, AssumeSingleUse))
3762assert((KnownUndef & KnownZero) == 0 &&
"Elements flagged as undef AND zero");
3764// Constant fold all undef cases. 3765// TODO: Handle zero cases as well. 3772/// Determine which of the bits specified in Mask are known to be either zero or 3773/// one and return them in the Known. 3776constAPInt &DemandedElts,
3778unsignedDepth)
const{
3783"Should use MaskedValueIsZero if you don't know whether Op" 3784" is a target node!");
3791unsignedDepth)
const{
3797// The low bits are known zero if the pointer is aligned. 3803unsignedDepth)
const{
3807/// This method can be implemented by targets that want to expose additional 3808/// information about sign bits to the DAG Combiner. 3812unsignedDepth)
const{
3817"Should use ComputeNumSignBits if you don't know whether Op" 3818" is a target node!");
3835"Should use SimplifyDemandedVectorElts if you don't know whether Op" 3836" is a target node!");
3847"Should use SimplifyDemandedBits if you don't know whether Op" 3848" is a target node!");
3849 computeKnownBitsForTargetNode(
Op, Known, DemandedElts, TLO.
DAG,
Depth);
3861"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" 3862" is a target node!");
3895"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op" 3896" is a target node!");
3898// If Op can't create undef/poison and none of its operands are undef/poison 3899// then Op is never undef/poison. 3900return !canCreateUndefOrPoisonForTargetNode(
Op, DemandedElts, DAG,
PoisonOnly,
3901/*ConsiderFlags*/true,
Depth) &&
3903 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3915"Should use canCreateUndefOrPoison if you don't know whether Op" 3916" is a target node!");
3917// Be conservative and return true. 3924unsignedDepth)
const{
3929"Should use isKnownNeverNaN if you don't know whether Op" 3930" is a target node!");
3935constAPInt &DemandedElts,
3938unsignedDepth)
const{
3943"Should use isSplatValue if you don't know whether Op" 3944" is a target node!");
3948// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must 3949// work with truncating build vectors and vectors with elements of less than 3958/*AllowTruncation=*/true)) {
3959 CVal = CN->getAPIntValue();
3960 EltWidth =
N.getValueType().getScalarSizeInBits();
3964// If this is a truncating splat, truncate the splat value. 3965// Otherwise, we may fail to match the expected values below. 3967 CVal = CVal.
trunc(EltWidth);
3991// Only interested in constant splats, we don't care about undef 3992// elements in identifying boolean constants and getConstantSplatNode 3993// returns NULL if all ops are undef; 4013// An extended value of 1 is always true, unless its original type is i1, 4014// in which case it will be sign extended to -1. 4015return (
N->isOne() && !SExt) || (SExt && (
N->getValueType(0) != MVT::i1));
4018returnN->isAllOnes() && SExt;
4023/// This helper function of SimplifySetCC tries to optimize the comparison when 4024/// either operand of the SetCC node is a bitwise-and instruction. 4027 DAGCombinerInfo &DCI)
const{
4037// (X & Y) != 0 --> zextOrTrunc(X & Y) 4038// iff everything but LSB is known zero: 4048// Try to eliminate a power-of-2 mask constant by converting to a signbit 4049// test in a narrow type that we can truncate to with no cost. Examples: 4050// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0 4051// (i32 X & 32768) != 0 --> (trunc X to i16) < 0 4052// TODO: This conservatively checks for type legality on the source and 4053// destination types. That may inhibit optimizations, but it also 4054// allows setcc->shift transforms that may be more beneficial. 4055auto *AndC = dyn_cast<ConstantSDNode>(N0.
getOperand(1));
4056if (AndC &&
isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4059 AndC->getAPIntValue().getActiveBits());
4068// Match these patterns in any of their permutations: 4082// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if 4083// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as 4084// its liable to create and infinite loop. 4086if (isXAndYEqZeroPreferableToXAndYEqY(
Cond, OpVT) &&
4088// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. 4089// Note that where Y is variable and is known to have at most one bit set 4090// (for example, if it is Z & 1) we cannot do this; the expressions are not 4091// equivalent when Y == 0. 4094if (DCI.isBeforeLegalizeOps() ||
4098// If the target supports an 'and-not' or 'and-complement' logic operation, 4099// try to use that to make a comparison operation more efficient. 4100// But don't do this transform if the mask is a single bit because there are 4101// more efficient ways to deal with that case (for example, 'bt' on x86 or 4104// Bail out if the compare operand that we want to turn into a zero is 4105// already a zero (otherwise, infinite loop). 4109// Transform this into: ~X & Y == 0. 4118/// There are multiple IR patterns that could be checking whether certain 4119/// truncation of a signed number would be lossy or not. The pattern which is 4120/// best at IR level, may not lower optimally. Thus, we want to unfold it. 4121/// We are looking for the following pattern: (KeptBits is a constant) 4122/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) 4123/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. 4124/// KeptBits also can't be 1, that would have been folded to %x dstcond 0 4125/// We will unfold it into the natural trunc+sext pattern: 4126/// ((%x << C) a>> C) dstcond %x 4127/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x) 4128SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4131// We must be comparing with a constant. 4133if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4136// N0 should be: add %x, (1 << (KeptBits-1)) 4140// And we must be 'add'ing a constant. 4142if (!(C01 = dyn_cast<ConstantSDNode>(N0->
getOperand(1))))
4146EVT XVT =
X.getValueType();
4148// Validate constants ... 4157// But need to 'canonicalize' the constant. 4161// But need to 'canonicalize' the constant. 4170auto checkConstants = [&
I1, &I01]() ->
bool {
4171// Both of them must be power-of-two, and the constant from setcc is bigger. 4175if (checkConstants()) {
4176// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256 4178// What if we invert constants? (and the target predicate) 4183if (!checkConstants())
4185// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256 4188// They are power-of-two, so which bit is set? 4189constunsigned KeptBits =
I1.logBase2();
4190constunsigned KeptBitsMinusOne = I01.
logBase2();
4193if (KeptBits != (KeptBitsMinusOne + 1))
4197// We don't want to do this in every single case. 4202// Unfold into: sext_inreg(%x) cond %x 4203// Where 'cond' will be either 'eq' or 'ne'. 4207return DAG.
getSetCC(
DL, SCCVT, SExtInReg,
X, NewCond);
4210// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 4211SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4213 DAGCombinerInfo &DCI,
constSDLoc &
DL)
const{
4215"Should be a comparison with 0.");
4217"Valid only for [in]equality comparisons.");
4219unsigned NewShiftOpcode;
4224// Look for '(C l>>/<< Y)'. 4226// The shift should be one-use. 4229unsigned OldShiftOpcode =
V.getOpcode();
4230switch (OldShiftOpcode) {
4238returnfalse;
// must be a logical shift. 4240// We should be shifting a constant. 4241// FIXME: best to use isConstantOrConstantVector(). 4252X, XC,
CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG);
4255// LHS of comparison should be an one-use 'and'. 4262// 'and' is commutative! 4269EVT VT =
X.getValueType();
4272// ((X 'OppositeShiftOpcode' Y) & C) Cond 0 4279/// Try to fold an equality comparison with a {add/sub/xor} binary operation as 4280/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to 4281/// handle the commuted versions of these patterns. 4284 DAGCombinerInfo &DCI)
const{
4290// (X + Y) == X --> Y == 0 4291// (X - Y) == X --> Y == 0 4292// (X ^ Y) == X --> Y == 0 4303// (X + Y) == Y --> X == 0 4304// (X ^ Y) == Y --> X == 0 4308// The shift would not be valid if the operands are boolean (i1). 4312// (X - Y) == Y --> X == Y << 1 4315if (!DCI.isCalledByLegalizer())
4316 DCI.AddToWorklist(YShl1.
getNode());
4324// Look through truncs that don't change the value of a ctpop. 4325// FIXME: Add vector support? Need to be careful with setcc result type below. 4331if (CTPOP.getOpcode() !=
ISD::CTPOP || !CTPOP.hasOneUse())
4334EVT CTVT = CTPOP.getValueType();
4335SDValue CTOp = CTPOP.getOperand(0);
4337// Expand a power-of-2-or-zero comparison based on ctpop: 4338// (ctpop x) u< 2 -> (x & x-1) == 0 4339// (ctpop x) u> 1 -> (x & x-1) != 0 4341// Keep the CTPOP if it is a cheap vector op. 4349returnSDValue();
// This is handled elsewhere. 4355for (
unsigned i = 0; i <
Passes; i++) {
4363// Expand a power-of-2 comparison based on ctpop 4365// Keep the CTPOP if it is cheap. 4374// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so 4375// check before emitting a potentially unnecessary op. 4377// (ctpop x) == 1 --> (x & x-1) == 0 4378// (ctpop x) != 1 --> (x & x-1) != 0 4384// (ctpop x) == 1 --> (x ^ x-1) > x-1 4385// (ctpop x) != 1 --> (x ^ x-1) <= x-1 4404auto getRotateSource = [](
SDValueX) {
4406returnX.getOperand(0);
4410// Peek through a rotated value compared against 0 or -1: 4411// (rot X, Y) == 0/-1 --> X == 0/-1 4412// (rot X, Y) != 0/-1 --> X != 0/-1 4413if (
SDValue R = getRotateSource(N0))
4416// Peek through an 'or' of a rotated value compared against 0: 4417// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0 4418// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0 4420// TODO: Add the 'and' with -1 sibling. 4421// TODO: Recurse through a series of 'or' ops to find the rotate. 4440// If we are testing for all-bits-clear, we might be able to do that with 4441// less shifting since bit-order does not matter. 4455if (!ShAmtC || ShAmtC->getAPIntValue().uge(
BitWidth))
4458// Canonicalize fshr as fshl to reduce pattern-matching. 4459unsigned ShAmt = ShAmtC->getZExtValue();
4463// Match an 'or' with a specific operand 'Other' in either commuted variant. 4468if (
Or.getOperand(0) ==
Other) {
4473if (
Or.getOperand(1) ==
Other) {
4485if (matchOr(F0, F1)) {
4486// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0 4492if (matchOr(F1, F0)) {
4493// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0 4503/// Try to simplify a setcc built with the specified operands and cc. If it is 4504/// unable to simplify it, return a null SDValue. 4508constSDLoc &dl)
const{
4514// Constant fold or commute setcc. 4518bool N0ConstOrSplat =
4520bool N1ConstOrSplat =
4523// Canonicalize toward having the constant on the RHS. 4524// TODO: Handle non-splat vector constants. All undef causes trouble. 4525// FIXME: We can't yet fold constant scalable vector splats, so avoid an 4526// infinite loop here when we encounter one. 4528if (N0ConstOrSplat && !N1ConstOrSplat &&
4531return DAG.
getSetCC(dl, VT, N1, N0, SwappedCC);
4533// If we have a subtract with the same 2 non-constant operands as this setcc 4534// -- but in reverse order -- then try to commute the operands of this setcc 4535// to match. A matching pair of setcc (cmp) and sub may be combined into 1 4536// instruction on some targets. 4537if (!N0ConstOrSplat && !N1ConstOrSplat &&
4542return DAG.
getSetCC(dl, VT, N1, N0, SwappedCC);
4551constAPInt &C1 = N1C->getAPIntValue();
4553// Optimize some CTPOP cases. 4557// For equality to 0 of a no-wrap multiply, decompose and test each op: 4558// X * Y == 0 --> (X == 0) || (Y == 0) 4559// X * Y != 0 --> (X != 0) && (Y != 0) 4560// TODO: This bails out if minsize is set, but if the target doesn't have a 4561// single instruction multiply for this type, it would likely be 4562// smaller to decompose. 4571return DAG.
getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4574// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an 4575// equality comparison, then we're just comparing whether X itself is 4584// (srl (ctlz x), 5) == 0 -> X != 0 4585// (srl (ctlz x), 5) != 1 -> X != 0 4588// (srl (ctlz x), 5) != 0 -> X == 0 4589// (srl (ctlz x), 5) == 1 -> X == 0 4600// FIXME: Support vectors. 4601if (
auto *N1C = dyn_cast<ConstantSDNode>(N1.
getNode())) {
4602constAPInt &C1 = N1C->getAPIntValue();
4604// (zext x) == C --> x == (trunc C) 4605// (sext x) == C --> x == (trunc C) 4616// DAGCombine turns costly ZExts into ANDs 4617if (
auto *
C = dyn_cast<ConstantSDNode>(N0->
getOperand(1)))
4618if ((
C->getAPIntValue()+1).isPowerOf2()) {
4619 MinBits =
C->getAPIntValue().countr_one();
4627 }
elseif (
auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4628// ZEXTLOAD / SEXTLOAD 4630 MinBits = LN0->getMemoryVT().getSizeInBits();
4634 MinBits = LN0->getMemoryVT().getSizeInBits();
4639// Figure out how many bits we need to preserve this constant. 4642// Make sure we're not losing bits from the constant. 4645 MinBits >= ReqdBits) {
4647if (isTypeDesirableForOp(
ISD::SETCC, MinVT)) {
4648// Will get folded away. 4650if (MinBits == 1 && C1 == 1)
4651// Invert the condition. 4658// If truncating the setcc operands is not desirable, we can still 4659// simplify the expression in some cases: 4660// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc) 4661// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc)) 4662// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc)) 4663// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc) 4664// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc)) 4665// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc) 4669if (TopSetCC.
getValueType() == MVT::i1 && VT == MVT::i1 &&
4682 cast<CondCodeSDNode>(TopSetCC.
getOperand(2))->get(),
4691// If the LHS is '(and load, const)', the RHS is 0, the test is for 4692// equality or unsigned, and all 1 bits of the const are in the same 4693// partial word, see if we can shorten the load. 4701auto *Lod = cast<LoadSDNode>(N0.
getOperand(0));
4703unsigned bestWidth = 0, bestOffset = 0;
4704if (Lod->isSimple() && Lod->isUnindexed() &&
4705 (Lod->getMemoryVT().isByteSized() ||
4707unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4709unsigned maskWidth = origWidth;
4710// We can narrow (e.g.) 16-bit extending loads on 32-bit target to 4711// 8 bits, but have to be careful... 4713 origWidth = Lod->getMemoryVT().getSizeInBits();
4715// Only consider power-of-2 widths (and at least one byte) as candiates 4716// for the narrowed load. 4717for (
unsigned width = 8; width < origWidth; width *= 2) {
4722// Avoid accessing any padding here for now (we could use memWidth 4723// instead of origWidth here otherwise). 4724unsigned maxOffset = origWidth - width;
4725for (
unsigned offset = 0; offset <= maxOffset; offset += 8) {
4726if (Mask.isSubsetOf(newMask)) {
4732 *DAG.
getContext(), Layout, newVT, Lod->getAddressSpace(),
4733 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4735 bestOffset = ptrOffset / 8;
4736 bestMask = Mask.lshr(offset);
4754 Lod->getPointerInfo().getWithOffset(bestOffset),
4755 Lod->getOriginalAlign());
4763// If the LHS is a ZERO_EXTEND, perform the comparison on the input. 4767// If the comparison constant has bits in the upper part, the 4768// zero-extended value could never match. 4782// True if the sign bit of C1 is set. 4786// True if the sign bit of C1 isn't set. 4793// Otherwise, we can perform the comparison with the low bits. 4802// FIXME: Should use isNarrowingProfitable. 4817break;
// todo, be more careful with signed comparisons 4828// If the constant doesn't fit into the number of bits for the source of 4829// the sign extension, it is impossible for both sides to be equal. 4834 ExtDstTy != ExtSrcTy &&
"Unexpected types!");
4840// Otherwise, make this a use of a zext. 4843 }
elseif ((N1C->isZero() || N1C->isOne()) &&
4845// SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are 4846// excluded as they are handled below whilst checking for foldBooleans. 4856// Invert the condition. 4871// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We 4872// can only do this if the top bits are known zero. 4877// Okay, get the un-inverted input value. 4884// ((X^1)&1)^1 -> X & 1 4890return DAG.
getSetCC(dl, VT, Val, N1,
4893 }
elseif (N1C->isOne()) {
4903// Ensure that the input setccs return an i1 type or 0/1 value. 4909// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) 4915// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. 4930 cast<VTSDNode>(Op0.
getOperand(1))->getVT() == MVT::i1)
4938// icmp eq/ne (urem %x, %y), 0 4939// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': 4949// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0 4950// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0 4961 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1,
Cond, DCI, dl))
4965// These simplifications apply to splat vectors as well. 4966// TODO: Handle more splat vector cases. 4968constAPInt &C1 = N1C->getAPIntValue();
4970APInt MinVal, MaxVal;
4980// Canonicalize GE/LE comparisons to use GT/LT comparisons. 4986if (!VT.
isVector()) {
// TODO: Support this for vectors. 4987// X >= C0 --> X > (C0 - 1) 4992 (!N1C->isOpaque() || (
C.getBitWidth() <= 64 &&
5006// X <= C0 --> X < (C0 + 1) 5007if (!VT.
isVector()) {
// TODO: Support this for vectors. 5012 (!N1C->isOpaque() || (
C.getBitWidth() <= 64 &&
5025// TODO: Support this for vectors after legalize ops. 5027// Canonicalize setlt X, Max --> setne X, Max 5031// If we have setult X, 1, turn it into seteq X, 0 5043// TODO: Support this for vectors after legalize ops. 5045// Canonicalize setgt X, Min --> setne X, Min 5049// If we have setugt X, Max-1, turn it into seteq X, Max 5058// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 5060if (
SDValueCC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5061 VT, N0, N1,
Cond, DCI, dl))
5064// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y). 5065// For example, when high 32-bits of i64 X are known clear: 5066// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 5067// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 5068bool CmpZero = N1C->isZero();
5069bool CmpNegOne = N1C->isAllOnes();
5070if ((CmpZero || CmpNegOne) && N0.
hasOneUse()) {
5071// Match or(lo,shl(hi,bw/2)) pattern. 5073unsigned EltBits = V.getScalarValueSizeInBits();
5074if (V.getOpcode() !=
ISD::OR || (EltBits % 2) != 0)
5079// Unshifted element must have zero upperbits. 5081 isa<ConstantSDNode>(
RHS.getOperand(1)) &&
5082RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5085Hi =
RHS.getOperand(0);
5089 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
5090LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5093Hi =
LHS.getOperand(0);
5101unsigned HalfBits = EltBits / 2;
5112if (IsConcat(N0,
Lo,
Hi))
5113return MergeConcat(
Lo,
Hi);
5126// If we have "setcc X, C0", check to see if we can shrink the immediate 5128// TODO: Support this for vectors after legalize ops. 5130// SETUGT X, SINTMAX -> SETLT X, 0 5131// SETUGE X, SINTMIN -> SETLT X, 0 5138// SETULT X, SINTMIN -> SETGT X, -1 5139// SETULE X, SINTMAX -> SETGT X, -1 5148// Back to non-vector simplifications. 5149// TODO: Can we do these for vector splats? 5150if (
auto *N1C = dyn_cast<ConstantSDNode>(N1.
getNode())) {
5151constAPInt &C1 = N1C->getAPIntValue();
5154// Fold bit comparisons when we can. This will result in an 5155// incorrect value when boolean false is negative one, unless 5156// the bitsize is 1 in which case the false value is the same 5157// in practice regardless of the representation. 5163if (
auto *AndRHS = dyn_cast<ConstantSDNode>(N0.
getOperand(1))) {
5164if (
Cond ==
ISD::SETNE && C1 == 0) {
// (X & 8) != 0 --> (X & 8) >> 3 5165// Perform the xform if the AND RHS is a single bit. 5166unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5167if (AndRHS->getAPIntValue().isPowerOf2() &&
5175// (X & 8) == 8 --> (X & 8) >> 3 5176// Perform the xform if C1 is a single bit. 5190// (X & -256) == 256 -> (X >> 8) == 1 5193if (
auto *AndRHS = dyn_cast<ConstantSDNode>(N0.
getOperand(1))) {
5194constAPInt &AndRHSC = AndRHS->getAPIntValue();
5209// X < 0x100000000 -> (X >> 32) < 1 5210// X >= 0x100000000 -> (X >> 32) >= 1 5211// X <= 0x0ffffffff -> (X >> 32) < 1 5212// X > 0x0ffffffff -> (X >> 32) >= 1 5231return DAG.
getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5237if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5238auto *CFP = cast<ConstantFPSDNode>(N1);
5239assert(!CFP->getValueAPF().isNaN() &&
"Unexpected NaN value");
5241// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the 5242// constant if knowing that the operand is non-nan is enough. We prefer to 5243// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to 5248// setcc (fneg x), C -> setcc swap(pred) x, -C 5258// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf 5260 !
isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5273// If the condition is not legal, see if we can find an equivalent one 5276// If the comparison was an awkward floating-point == or != and one of 5277// the comparison operands is infinity or negative infinity, convert the 5278// condition to a less-awkward <= or >=. 5279if (CFP->getValueAPF().isInfinity()) {
5280bool IsNegInf = CFP->getValueAPF().isNegative();
5291return DAG.
getSetCC(dl, VT, N0, N1, NewCond);
5297// The sext(setcc()) => setcc() optimization relies on the appropriate 5298// constant being emitted. 5300"Integer types should be handled by FoldSetCC");
5304if (UOF == 2)
// FP operators that are undefined on NaNs. 5306if (UOF ==
unsigned(EqTrue))
5308// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO 5309// if it is not already. 5311if (NewCond !=
Cond &&
5314return DAG.
getSetCC(dl, VT, N0, N1, NewCond);
5321if ((isSignedIntSetCC(
Cond) || isUnsignedIntSetCC(
Cond)) &&
5339// Simplify (X+Y) == (X+Z) --> Y == Z 5346// If X op Y == Y op X, try other combinations. 5356// If RHS is a legal immediate value for a compare instruction, we need 5357// to be careful about increasing register pressure needlessly. 5358bool LegalRHSImm =
false;
5360if (
auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5361if (
auto *LHSR = dyn_cast<ConstantSDNode>(N0.
getOperand(1))) {
5362// Turn (X+C1) == C2 --> X == C2-C1 5366 DAG.
getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5370// Turn (X^C1) == C2 --> X == C1^C2 5374 DAG.
getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5379// Turn (C1-X) == C2 --> X == C1-C2 5380if (
auto *SUBC = dyn_cast<ConstantSDNode>(N0.
getOperand(0)))
5384 DAG.
getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5388// Could RHSC fold directly into a compare? 5389if (RHSC->getValueType(0).getSizeInBits() <= 64)
5393// (X+Y) == X --> Y == 0 and similar folds. 5394// Don't do this if X is an immediate that can fold into a cmp 5395// instruction and X+Y has other uses. It could be an induction variable 5396// chain, and the transform would increase register pressure. 5398if (
SDValue V = foldSetCCWithBinOp(VT, N0, N1,
Cond, dl, DCI))
5404if (
SDValue V = foldSetCCWithBinOp(VT, N1, N0,
Cond, dl, DCI))
5407if (
SDValue V = foldSetCCWithAnd(VT, N0, N1,
Cond, dl, DCI))
5411// Fold remainder of division by a constant. 5414// When division is cheap or optimizing for minimum size, 5415// fall through to DIVREM creation by skipping this fold. 5418if (
SDValue Folded = buildUREMEqFold(VT, N0, N1,
Cond, DCI, dl))
5421if (
SDValue Folded = buildSREMEqFold(VT, N0, N1,
Cond, DCI, dl))
5427// Fold away ALL boolean setcc's. 5434 N0 = DAG.
getNOT(dl, Temp, OpVT);
5441caseISD::SETGT:
// X >s Y --> X == 0 & Y == 1 --> ~X & Y 5442caseISD::SETULT:
// X <u Y --> X == 0 & Y == 1 --> ~X & Y 5443 Temp = DAG.
getNOT(dl, N0, OpVT);
5448caseISD::SETLT:
// X <s Y --> X == 1 & Y == 0 --> ~Y & X 5449caseISD::SETUGT:
// X >u Y --> X == 1 & Y == 0 --> ~Y & X 5450 Temp = DAG.
getNOT(dl, N1, OpVT);
5455caseISD::SETULE:
// X <=u Y --> X == 0 | Y == 1 --> ~X | Y 5456caseISD::SETGE:
// X >=s Y --> X == 0 | Y == 1 --> ~X | Y 5457 Temp = DAG.
getNOT(dl, N0, OpVT);
5462caseISD::SETUGE:
// X >=u Y --> X == 1 | Y == 0 --> ~Y | X 5463caseISD::SETLE:
// X <=s Y --> X == 1 | Y == 0 --> ~Y | X 5464 Temp = DAG.
getNOT(dl, N1, OpVT);
5471// FIXME: If running after legalize, we probably can't do this. 5473 N0 = DAG.
getNode(ExtendCode, dl, VT, N0);
5478// Could not fold it. 5482/// Returns true (and the GlobalValue and the offset) if the node is a 5483/// GlobalAddress + offset. 5489if (
auto *GASD = dyn_cast<GlobalAddressSDNode>(
N)) {
5490 GA = GASD->getGlobal();
5491Offset += GASD->getOffset();
5499if (
auto *V = dyn_cast<ConstantSDNode>(N2)) {
5500Offset += V->getSExtValue();
5504if (
auto *V = dyn_cast<ConstantSDNode>(N1)) {
5505Offset += V->getSExtValue();
5516// Default implementation: no optimization. 5520//===----------------------------------------------------------------------===// 5521// Inline Assembler Implementation Methods 5522//===----------------------------------------------------------------------===// 5526unsigned S = Constraint.
size();
5529switch (Constraint[0]) {
5532return C_RegisterClass;
5534case'o':
// offsetable 5535case'V':
// not offsetable 5539case'n':
// Simple Integer 5540case'E':
// Floating Point Constant 5541case'F':
// Floating Point Constant 5543case'i':
// Simple Integer or Relocatable Constant 5544case's':
// Relocatable Constant 5545case'X':
// Allow ANY value. 5546case'I':
// Target registers. 5560if (S > 1 && Constraint[0] ==
'{' && Constraint[S - 1] ==
'}') {
5561if (S == 8 && Constraint.
substr(1, 6) ==
"memory")
// "{memory}" 5568/// Try to replace an X constraint, which matches anything, with another that 5569/// has more specific requirements based on the type of the corresponding 5575return"f";
// works for many targets 5585/// Lower the specified operand into the Ops vector. 5586/// If it is invalid, don't add anything to Ops. 5589 std::vector<SDValue> &Ops,
5592if (Constraint.
size() > 1)
5595char ConstraintLetter = Constraint[0];
5596switch (ConstraintLetter) {
5598case'X':
// Allows any operand 5599case'i':
// Simple Integer or Relocatable Constant 5600case'n':
// Simple Integer 5601case's': {
// Relocatable Constant 5606// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C), 5607// etc., since getelementpointer is variadic. We can't use 5608// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible 5609// while in this case the GA may be furthest from the root node which is 5610// likely an ISD::ADD. 5612if ((
C = dyn_cast<ConstantSDNode>(
Op)) && ConstraintLetter !=
's') {
5613// gcc prints these as sign extended. Sign extend value to 64 bits 5614// now; without this it would get ZExt'd later in 5615// ScheduleDAGSDNodes::EmitNode, which is very generic. 5616bool IsBool =
C->getConstantIntValue()->getBitWidth() == 1;
5626if (ConstraintLetter !=
'n') {
5627if (
constauto *GA = dyn_cast<GlobalAddressSDNode>(
Op)) {
5629 GA->getValueType(0),
5630Offset + GA->getOffset()));
5633if (
constauto *BA = dyn_cast<BlockAddressSDNode>(
Op)) {
5635 BA->getBlockAddress(), BA->getValueType(0),
5636Offset + BA->getOffset(), BA->getTargetFlags()));
5639if (isa<BasicBlockSDNode>(
Op)) {
5644constunsigned OpCode =
Op.getOpcode();
5646if ((
C = dyn_cast<ConstantSDNode>(
Op.getOperand(0))))
5647Op =
Op.getOperand(1);
5648// Subtraction is not commutative. 5650 (
C = dyn_cast<ConstantSDNode>(
Op.getOperand(1))))
5651Op =
Op.getOperand(0);
5668std::pair<unsigned, const TargetRegisterClass *>
5674assert(*(Constraint.
end() - 1) ==
'}' &&
"Not a brace enclosed constraint?");
5676// Remove the braces from around the name. 5679 std::pair<unsigned, const TargetRegisterClass *> R =
5682// Figure out which register class contains this reg. 5684// If none of the value types for this register class are valid, we 5685// can't use it. For example, 64-bit reg classes on 32-bit targets. 5691 std::pair<unsigned, const TargetRegisterClass *> S =
5692 std::make_pair(PR, RC);
5694// If this register class has the requested value type, return it, 5695// otherwise keep searching and return the first class found 5696// if no other is found which explicitly has the requested type. 5708//===----------------------------------------------------------------------===// 5709// Constraint Selection. 5711/// Return true of this is an input operand that is a matching constraint like 5714assert(!ConstraintCode.empty() &&
"No known constraint!");
5715return isdigit(
static_cast<unsignedchar>(ConstraintCode[0]));
5718/// If this is an input matching constraint, this method returns the output 5719/// operand it matches. 5721assert(!ConstraintCode.empty() &&
"No known constraint!");
5722return atoi(ConstraintCode.c_str());
5725/// Split up the constraint string from the inline assembly value into the 5726/// specific constraints and their prefixes, and also tie in the associated 5728/// If this returns an empty vector, and if the constraint string itself 5729/// isn't empty, there was an error parsing. 5734 /// Information about all of the constraints. 5736constInlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5737unsigned maCount = 0;
// Largest number of multiple alternative constraints. 5739// Do a prepass over the constraints, canonicalizing them, and building up the 5740// ConstraintOperands list. 5741unsigned ArgNo = 0;
// ArgNo - The argument of the CallInst. 5742unsigned ResNo = 0;
// ResNo - The result number of the next output. 5743unsigned LabelNo = 0;
// LabelNo - CallBr indirect dest number. 5746 ConstraintOperands.emplace_back(std::move(CI));
5749// Update multiple alternative constraint count. 5755// Compute the value type for each operand. 5756switch (OpInfo.
Type) {
5758// Indirect outputs just consume an argument. 5764// The return value of the call is this value. As such, there is no 5765// corresponding argument. 5766assert(!Call.getType()->isVoidTy() &&
"Bad inline asm!");
5767if (
auto *STy = dyn_cast<StructType>(Call.getType())) {
5772assert(ResNo == 0 &&
"Asm only has one result!");
5782 OpInfo.
CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5793 OpTy = Call.getParamElementType(ArgNo);
5794assert(OpTy &&
"Indirect operand must have elementtype attribute");
5797// Look for vector wrapped in a struct. e.g. { <16 x i8> }. 5798if (
StructType *STy = dyn_cast<StructType>(OpTy))
5799if (STy->getNumElements() == 1)
5800 OpTy = STy->getElementType(0);
5802// If OpTy is not a single value, it may be a struct/union that we 5803// can tile with integers. 5805unsigned BitSize =
DL.getTypeSizeInBits(OpTy);
5825// If we have multiple alternative constraints, select the best alternative. 5826if (!ConstraintOperands.empty()) {
5828unsigned bestMAIndex = 0;
5830// weight: -1 = invalid match, and 0 = so-so match to 5 = good match. 5833// Compute the sums of the weights for each alternative, keeping track 5834// of the best (highest weight) one so far. 5835for (maIndex = 0; maIndex < maCount; ++maIndex) {
5837for (
unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5838 cIndex != eIndex; ++cIndex) {
5843// If this is an output operand with a matching input operand, 5844// look up the matching input. If their types mismatch, e.g. one 5845// is an integer, the other is floating point, or their sizes are 5846// different, flag it as an maCantMatch. 5854 weightSum = -1;
// Can't match. 5859 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5864 weightSum += weight;
5867if (weightSum > bestWeight) {
5868 bestWeight = weightSum;
5869 bestMAIndex = maIndex;
5873// Now select chosen alternative in each constraint. 5876 cInfo.selectAlternative(bestMAIndex);
5880// Check and hook up tied operands, choose constraint code to use. 5881for (
unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5882 cIndex != eIndex; ++cIndex) {
5885// If this is an output operand with a matching input operand, look up the 5886// matching input. If their types mismatch, e.g. one is an integer, the 5887// other is floating point, or their sizes are different, flag it as an 5893 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5896 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5903if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5904 (MatchRC.second != InputRC.second)) {
5906" with a matching output constraint of" 5907" incompatible type!");
5913return ConstraintOperands;
5916/// Return a number indicating our preference for chosing a type of constraint 5917/// over another, for the purpose of sorting them. Immediates are almost always 5918/// preferrable (when they can be emitted). A higher return value means a 5919/// stronger preference for one constraint type relative to another. 5920/// FIXME: We should prefer registers over memory but doing so may lead to 5921/// unrecoverable register exhaustion later. 5922/// https://github.com/llvm/llvm-project/issues/20571 5941/// Examine constraint type and operand type and determine a weight value. 5942/// This object must already have been set up with the operand type 5943/// and the current alternative constraint selected. 5948if (maIndex >= (
int)
info.multipleAlternatives.size())
5949 rCodes = &
info.Codes;
5951 rCodes = &
info.multipleAlternatives[maIndex].Codes;
5954// Loop over the options, keeping track of the most general one. 5955for (
const std::string &rCode : *rCodes) {
5957 getSingleConstraintMatchWeight(
info, rCode.c_str());
5958if (weight > BestWeight)
5959 BestWeight = weight;
5965/// Examine constraint type and operand type and determine a weight value. 5966/// This object must already have been set up with the operand type 5967/// and the current alternative constraint selected. 5972Value *CallOperandVal =
info.CallOperandVal;
5973// If we don't have a value, we can't do a match, 5974// but allow it at the lowest weight. 5977// Look at the constraint type. 5978switch (*constraint) {
5979case'i':
// immediate integer. 5980case'n':
// immediate integer with a known value. 5981if (isa<ConstantInt>(CallOperandVal))
5982 weight = CW_Constant;
5984case's':
// non-explicit intregal immediate. 5985if (isa<GlobalValue>(CallOperandVal))
5986 weight = CW_Constant;
5988case'E':
// immediate float if host format. 5989case'F':
// immediate float. 5990if (isa<ConstantFP>(CallOperandVal))
5991 weight = CW_Constant;
5993case'<':
// memory operand with autodecrement. 5994case'>':
// memory operand with autoincrement. 5995case'm':
// memory operand. 5996case'o':
// offsettable memory operand 5997case'V':
// non-offsettable memory operand 6000case'r':
// general register. 6001case'g':
// general register, memory operand or immediate integer. 6002// note: Clang converts "g" to "imr". 6004 weight = CW_Register;
6006case'X':
// any operand. 6008 weight = CW_Default;
6014/// If there are multiple different constraints that we could pick for this 6015/// operand (e.g. "imr") try to pick the 'best' one. 6016/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall 6017/// into seven classes: 6018/// Register -> one specific register 6019/// RegisterClass -> a group of regs 6021/// Address -> a symbolic memory reference 6022/// Immediate -> immediate values 6023/// Other -> magic values (such as "Flag Output Operands") 6024/// Unknown -> something we don't recognize yet and can't handle 6025/// Ideally, we would pick the most specific constraint possible: if we have 6026/// something that fits into a register, we would pick it. The problem here 6027/// is that if we have something that could either be in a register or in 6028/// memory that use of the register could cause selection of *other* 6029/// operands to fail: they might only succeed if we pick memory. Because of 6030/// this the heuristic we use is: 6032/// 1) If there is an 'other' constraint, and if the operand is valid for 6033/// that constraint, use it. This makes us take advantage of 'i' 6034/// constraints when available. 6035/// 2) Otherwise, pick the most general constraint present. This prefers 6036/// 'm' over 'r', for example. 6042 Ret.reserve(OpInfo.
Codes.size());
6046// Indirect 'other' or 'immediate' constraints are not allowed. 6052// Things with matching constraints can only be registers, per gcc 6053// documentation. This mainly affects "g" constraints. 6057 Ret.emplace_back(Code, CType);
6062 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6068/// If we have an immediate, see if we can lower it. Return true if we can, 6076"need immediate or other");
6081 std::vector<SDValue> ResultOps;
6083return !ResultOps.empty();
6086/// Determines the constraint code and constraint type to use for the specific 6087/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. 6091assert(!OpInfo.
Codes.empty() &&
"Must have at least one constraint");
6093// Single-letter constraints ('r') are very common. 6094if (OpInfo.
Codes.size() == 1) {
6102unsigned BestIdx = 0;
6103for (
constunsigned E =
G.size();
6109// If we're out of constraints, just pick the first one. 6110if (BestIdx + 1 == E) {
6120// 'X' matches anything. 6122// Constants are handled elsewhere. For Functions, the type here is the 6123// type of the result, which is not what we want to look at; leave them 6126if (isa<ConstantInt>(v) || isa<Function>(v)) {
6130if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6135// Otherwise, try to resolve it to something we know about by looking at 6136// the actual operand type. 6137if (
constchar *Repl = LowerXConstraint(OpInfo.
ConstraintVT)) {
6144/// Given an exact SDIV by a constant, create a multiplication 6145/// with the multiplicative inverse of the constant. 6146/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242 6152EVT VT =
N->getValueType(0);
6163APInt Divisor =
C->getAPIntValue();
6175// Collect all magic values from the build vector. 6185"Expected matchUnaryPredicate to return one element for scalable " 6190assert(isa<ConstantSDNode>(Op1) &&
"Expected a constant");
6192 Factor = Factors[0];
6204/// Given an exact UDIV by a constant, create a multiplication 6205/// with the multiplicative inverse of the constant. 6206/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242 6210EVT VT =
N->getValueType(0);
6221APInt Divisor =
C->getAPIntValue();
6227// Calculate the multiplicative inverse modulo BW. 6236// Collect all magic values from the build vector. 6246"Expected matchUnaryPredicate to return one element for scalable " 6251assert(isa<ConstantSDNode>(Op1) &&
"Expected a constant");
6253 Factor = Factors[0];
6270returnSDValue(
N, 0);
// Lower SDIV as SDIV 6280returnSDValue(
N, 0);
// Lower SREM as SREM 6284/// Build sdiv by power-of-2 with conditional move instructions 6285/// Ref: "Hacker's Delight" by Henry Warren 10-1 6286/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into: 6291/// neg res, res (when the divisor is negative) 6296EVT VT =
N->getValueType(0);
6304// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right. 6318// If we're dividing by a positive value, we're done. Otherwise, we must 6319// negate the result. 6327/// Given an ISD::SDIV node expressing a divide by constant, 6328/// return a DAG expression to select that will generate the same value by 6329/// multiplying by a magic number. 6330/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 6332bool IsAfterLegalization,
6333bool IsAfterLegalTypes,
6336EVT VT =
N->getValueType(0);
6343// Check to see if we can do this. 6344// FIXME: We should be more aggressive here. 6346// Limit this to simple scalars for now. 6350// If this type will be promoted to a large enough type with a legal 6351// multiply operation, we can go ahead and do this transform. 6361// If the sdiv has an 'exact' bit we can use a simpler lowering. 6362if (
N->getFlags().hasExact())
6371constAPInt &Divisor =
C->getAPIntValue();
6373int NumeratorFactor = 0;
6377// If d is +1/-1, we just multiply the numerator by +1/-1. 6383// If d > 0 and m < 0, add the numerator. 6384 NumeratorFactor = 1;
6386// If d < 0 and m > 0, subtract the numerator. 6387 NumeratorFactor = -1;
6400// Collect the shifts / magic values from each element. 6404SDValue MagicFactor, Factor, Shift, ShiftMask;
6412 Shifts.
size() == 1 && ShiftMasks.
size() == 1 &&
6413"Expected matchUnaryPredicate to return one element for scalable " 6420assert(isa<ConstantSDNode>(N1) &&
"Expected a constant");
6421 MagicFactor = MagicFactors[0];
6422 Factor = Factors[0];
6424 ShiftMask = ShiftMasks[0];
6427// Multiply the numerator (operand 0) by the magic value. 6428// FIXME: We should support doing a MUL in a wider type. 6430// If the type isn't legal, use a wider mul of the type calculated 6448// If type twice as wide legal, widen and use a mul plus a shift. 6454// Some targets like AMDGPU try to go from SDIV to SDIVREM which is then 6455// custom lowered. This is very expensive so avoid it at all costs for 6456// constant divisors. 6470SDValue Q = GetMULHS(N0, MagicFactor);
6476// (Optionally) Add/subtract the numerator using Factor. 6482// Shift right algebraic by shift value. 6486// Extract the sign bit, mask it and add it to the quotient. 6495/// Given an ISD::UDIV node expressing a divide by constant, 6496/// return a DAG expression to select that will generate the same value by 6497/// multiplying by a magic number. 6498/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 6500bool IsAfterLegalization,
6501bool IsAfterLegalTypes,
6504EVT VT =
N->getValueType(0);
6511// Check to see if we can do this. 6512// FIXME: We should be more aggressive here. 6514// Limit this to simple scalars for now. 6518// If this type will be promoted to a large enough type with a legal 6519// multiply operation, we can go ahead and do this transform. 6529// If the udiv has an 'exact' bit we can use a simpler lowering. 6530if (
N->getFlags().hasExact())
6536// Try to use leading zeros of the dividend to reduce the multiplier and 6537// avoid expensive fixups. 6540bool UseNPQ =
false, UsePreShift =
false, UsePostShift =
false;
6546constAPInt& Divisor =
C->getAPIntValue();
6548SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6550// Magic algorithm doesn't work for division by 1. We need to emit a select 6552if (Divisor.
isOne()) {
6553 PreShift = PostShift = DAG.
getUNDEF(ShSVT);
6554 MagicFactor = NPQFactor = DAG.
getUNDEF(SVT);
6558 Divisor, std::min(KnownLeadingZeros, Divisor.
countl_zero()));
6563"We shouldn't generate an undefined shift!");
6565"We shouldn't generate an undefined shift!");
6567"Unexpected pre-shift");
6574 UseNPQ |= magics.
IsAdd;
6575 UsePreShift |= magics.
PreShift != 0;
6586// Collect the shifts/magic values from each element. 6590SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6598 NPQFactors.
size() == 1 && PostShifts.
size() == 1 &&
6599"Expected matchUnaryPredicate to return one for scalable vectors");
6605assert(isa<ConstantSDNode>(N1) &&
"Expected a constant");
6606 PreShift = PreShifts[0];
6607 MagicFactor = MagicFactors[0];
6608 PostShift = PostShifts[0];
6617// FIXME: We should support doing a MUL in a wider type. 6619// If the type isn't legal, use a wider mul of the type calculated 6637// If type twice as wide legal, widen and use a mul plus a shift. 6643// Some targets like AMDGPU try to go from UDIV to UDIVREM which is then 6644// custom lowered. This is very expensive so avoid it at all costs for 6645// constant divisors. 6656returnSDValue();
// No mulhu or equivalent 6659// Multiply the numerator (operand 0) by the magic value. 6660 Q = GetMULHU(Q, MagicFactor);
6670// For vectors we might have a mix of non-NPQ/NPQ paths, so use 6671// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero. 6673 NPQ = GetMULHU(NPQ, NPQFactor);
6692return DAG.
getSelect(dl, VT, IsOne, N0, Q);
6695/// If all values in Values that *don't* match the predicate are same 'splat' 6696/// value, then replace all values with that splat value. 6697/// Else, if AlternativeReplacement was provided, then replace all values that 6698/// do match predicate with AlternativeReplacement value. 6704// Is there a value for which the Predicate does *NOT* match? What is it? 6706if (SplatValue != Values.
end()) {
6707// Does Values consist only of SplatValue's and values matching Predicate? 6710 }))
// Then we shall replace values matching predicate with SplatValue. 6711 Replacement = *SplatValue;
6714// Oops, we did not find the "baseline" splat value. 6715if (!AlternativeReplacement)
6716return;
// Nothing to do. 6717// Let's replace with provided value then. 6718 Replacement = AlternativeReplacement;
6723/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE 6724/// where the divisor is constant and the comparison target is zero, 6725/// return a DAG expression that will generate the same comparison result 6726/// using only multiplications, additions and shifts/rotations. 6727/// Ref: "Hacker's Delight" 10-17. 6731 DAGCombinerInfo &DCI,
6734if (
SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode,
Cond,
6737 DCI.AddToWorklist(
N);
6745TargetLowering::prepareUREMEqFold(
EVT SETCCVT,
SDValue REMNode,
6747 DAGCombinerInfo &DCI,
constSDLoc &
DL,
6749// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) 6750// - D must be constant, with D = D0 * 2^K where D0 is odd 6751// - P is the multiplicative inverse of D0 modulo 2^W 6752// - Q = floor(((2^W) - 1) / D) 6753// where W is the width of the common type of N and D. 6755"Only applicable for (in)equality comparisons.");
6764// If MUL is unavailable, we cannot proceed in any case. 6768bool ComparingWithAllZeros =
true;
6769bool AllComparisonsWithNonZerosAreTautological =
true;
6770bool HadTautologicalLanes =
false;
6771bool AllLanesAreTautological =
true;
6772bool HadEvenDivisor =
false;
6773bool AllDivisorsArePowerOfTwo =
true;
6774bool HadTautologicalInvertedLanes =
false;
6778// Division by 0 is UB. Leave it to be constant-folded elsewhere. 6783constAPInt &
Cmp = CCmp->getAPIntValue();
6785 ComparingWithAllZeros &=
Cmp.isZero();
6787// x u% C1` is *always* less than C1. So given `x u% C1 == C2`, 6788// if C2 is not less than C1, the comparison is always false. 6789// But we will only be able to produce the comparison that will give the 6790// opposive tautological answer. So this lane would need to be fixed up. 6791bool TautologicalInvertedLane =
D.ule(Cmp);
6792 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6794// If all lanes are tautological (either all divisors are ones, or divisor 6795// is not greater than the constant we are comparing with), 6796// we will prefer to avoid the fold. 6797bool TautologicalLane =
D.isOne() || TautologicalInvertedLane;
6798 HadTautologicalLanes |= TautologicalLane;
6799 AllLanesAreTautological &= TautologicalLane;
6801// If we are comparing with non-zero, we need'll need to subtract said 6802// comparison value from the LHS. But there is no point in doing that if 6803// every lane where we are comparing with non-zero is tautological.. 6805 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6807// Decompose D into D0 * 2^K 6808unsignedK =
D.countr_zero();
6809assert((!
D.isOne() || (K == 0)) &&
"For divisor '1' we won't rotate.");
6812// D is even if it has trailing zeros. 6813 HadEvenDivisor |= (
K != 0);
6814// D is a power-of-two if D0 is one. 6815// If all divisors are power-of-two, we will prefer to avoid the fold. 6816 AllDivisorsArePowerOfTwo &= D0.
isOne();
6819// 2^W requires W + 1 bits, so we have to extend and then truncate. 6820unsignedW =
D.getBitWidth();
6822assert((D0 *
P).isOne() &&
"Multiplicative inverse basic check failed.");
6824// Q = floor((2^W - 1) u/ D) 6825// R = ((2^W - 1) u% D) 6829// If we are comparing with zero, then that comparison constant is okay, 6830// else it may need to be one less than that. 6835"We are expecting that K is always less than all-ones for ShSVT");
6837// If the lane is tautological the result can be constant-folded. 6838if (TautologicalLane) {
6839// Set P and K amount to a bogus values so we can try to splat them. 6842// And ensure that comparison constant is tautological, 6843// it will always compare true/false. 6850/*implicitTrunc=*/true),
6859// Collect the values from each element. 6863// If all lanes are tautological, the result can be constant-folded. 6864if (AllLanesAreTautological)
6867// If this is a urem by a powers-of-two, avoid the fold since it can be 6868// best implemented as a bit test. 6869if (AllDivisorsArePowerOfTwo)
6874if (HadTautologicalLanes) {
6875// Try to turn PAmts into a splat, since we don't care about the values 6876// that are currently '0'. If we can't, just keep '0'`s. 6878// Try to turn KAmts into a splat, since we don't care about the values 6879// that are currently '-1'. If we can't, change them to '0'`s. 6889"Expected matchBinaryPredicate to return one element for " 6900if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6902returnSDValue();
// FIXME: Could/should use `ISD::ADD`? 6904"Expecting that the types on LHS and RHS of comparisons match.");
6912// Rotate right only if any divisor was even. We avoid rotates for all-odd 6913// divisors as a performance improvement, since rotating by 0 is a no-op. 6914if (HadEvenDivisor) {
6915// We need ROTR to do this. 6918// UREM: (rotr (mul N, P), K) 6923// UREM: (setule/setugt (rotr (mul N, P), K), Q) 6927if (!HadTautologicalInvertedLanes)
6930// If any lanes previously compared always-false, the NewCC will give 6931// always-true result for them, so we need to fixup those lanes. 6932// Or the other way around for inequality predicate. 6936// x u% C1` is *always* less than C1. So given `x u% C1 == C2`, 6937// if C2 is not less than C1, the comparison is always false. 6938// But we have produced the comparison that will give the 6939// opposive tautological answer. So these lanes would need to be fixed up. 6940SDValue TautologicalInvertedChannels =
6944// NOTE: we avoid letting illegal types through even if we're before legalize 6945// ops – legalization has a hard time producing good code for this. 6947// If we have a vector select, let's replace the comparison results in the 6948// affected lanes with the correct tautological result. 6950DL, SETCCVT, SETCCVT);
6952 Replacement, NewCC);
6955// Else, we can just invert the comparison result in the appropriate lanes. 6957// NOTE: see the note above VSELECT above. 6960 TautologicalInvertedChannels);
6962returnSDValue();
// Don't know how to lower. 6965/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE 6966/// where the divisor is constant and the comparison target is zero, 6967/// return a DAG expression that will generate the same comparison result 6968/// using only multiplications, additions and shifts/rotations. 6969/// Ref: "Hacker's Delight" 10-17. 6973 DAGCombinerInfo &DCI,
6976if (
SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode,
Cond,
6978assert(Built.
size() <= 7 &&
"Max size prediction failed.");
6980 DCI.AddToWorklist(
N);
6988TargetLowering::prepareSREMEqFold(
EVT SETCCVT,
SDValue REMNode,
6990 DAGCombinerInfo &DCI,
constSDLoc &
DL,
6992// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17. 6994// (seteq/ne (srem N, D), 0) 6996// (setule/ugt (rotr (add (mul N, P), A), K), Q) 6998// - D must be constant, with D = D0 * 2^K where D0 is odd 6999// - P is the multiplicative inverse of D0 modulo 2^W 7000// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) 7001// - Q = floor((2 * A) / (2^K)) 7002// where W is the width of the common type of N and D. 7004// When D is a power of two (and thus D0 is 1), the normal 7005// formula for A and Q don't apply, because the derivation 7006// depends on D not dividing 2^(W-1), and thus theorem ZRS 7007// does not apply. This specifically fails when N = INT_MIN. 7009// Instead, for power-of-two D, we use: 7011// |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1]) 7013// |-> Test that the top K bits are zero after rotation 7015"Only applicable for (in)equality comparisons.");
7024// If we are after ops legalization, and MUL is unavailable, we can not 7029// TODO: Could support comparing with non-zero too. 7031if (!CompTarget || !CompTarget->
isZero())
7034bool HadIntMinDivisor =
false;
7035bool HadOneDivisor =
false;
7036bool AllDivisorsAreOnes =
true;
7037bool HadEvenDivisor =
false;
7038bool NeedToApplyOffset =
false;
7039bool AllDivisorsArePowerOfTwo =
true;
7043// Division by 0 is UB. Leave it to be constant-folded elsewhere. 7047// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. 7049// WARNING: this fold is only valid for positive divisors! 7052D.negate();
// `rem %X, -C` is equivalent to `rem %X, C` 7054 HadIntMinDivisor |=
D.isMinSignedValue();
7056// If all divisors are ones, we will prefer to avoid the fold. 7057 HadOneDivisor |=
D.isOne();
7058 AllDivisorsAreOnes &=
D.isOne();
7060// Decompose D into D0 * 2^K 7061unsignedK =
D.countr_zero();
7062assert((!
D.isOne() || (K == 0)) &&
"For divisor '1' we won't rotate.");
7065if (!
D.isMinSignedValue()) {
7066// D is even if it has trailing zeros; unless it's INT_MIN, in which case 7067// we don't care about this lane in this fold, we'll special-handle it. 7068 HadEvenDivisor |= (
K != 0);
7071// D is a power-of-two if D0 is one. This includes INT_MIN. 7072// If all divisors are power-of-two, we will prefer to avoid the fold. 7073 AllDivisorsArePowerOfTwo &= D0.
isOne();
7076// 2^W requires W + 1 bits, so we have to extend and then truncate. 7077unsignedW =
D.getBitWidth();
7079assert((D0 *
P).isOne() &&
"Multiplicative inverse basic check failed.");
7081// A = floor((2^(W - 1) - 1) / D0) & -2^K 7085if (!
D.isMinSignedValue()) {
7086// If divisor INT_MIN, then we don't care about this lane in this fold, 7087// we'll special-handle it. 7088 NeedToApplyOffset |=
A != 0;
7091// Q = floor((2 * A) / (2^K)) 7095"We are expecting that A is always less than all-ones for SVT");
7097"We are expecting that K is always less than all-ones for ShSVT");
7099// If D was a power of two, apply the alternate constant derivation. 7107// If the divisor is 1 the result can be constant-folded. Likewise, we 7108// don't care about INT_MIN lanes, those can be set to undef if appropriate. 7110// Set P, A and K to a bogus values so we can try to splat them. 7115// x ?% 1 == 0 <--> true <--> x u<= -1 7123/*implicitTrunc=*/true),
7132// Collect the values from each element. 7136// If this is a srem by a one, avoid the fold since it can be constant-folded. 7137if (AllDivisorsAreOnes)
7140// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold 7141// since it can be best implemented as a bit test. 7142if (AllDivisorsArePowerOfTwo)
7145SDValue PVal, AVal, KVal, QVal;
7148// Try to turn PAmts into a splat, since we don't care about the values 7149// that are currently '0'. If we can't, just keep '0'`s. 7151// Try to turn AAmts into a splat, since we don't care about the 7152// values that are currently '-1'. If we can't, change them to '0'`s. 7155// Try to turn KAmts into a splat, since we don't care about the values 7156// that are currently '-1'. If we can't, change them to '0'`s. 7167 QAmts.
size() == 1 &&
7168"Expected matchUnaryPredicate to return one element for scalable " 7175assert(isa<ConstantSDNode>(
D) &&
"Expected a constant");
7186if (NeedToApplyOffset) {
7187// We need ADD to do this. 7191// (add (mul N, P), A) 7196// Rotate right only if any divisor was even. We avoid rotates for all-odd 7197// divisors as a performance improvement, since rotating by 0 is a no-op. 7198if (HadEvenDivisor) {
7199// We need ROTR to do this. 7202// SREM: (rotr (add (mul N, P), A), K) 7207// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q) 7212// If we didn't have lanes with INT_MIN divisor, then we're done. 7213if (!HadIntMinDivisor)
7216// That fold is only valid for positive divisors. Which effectively means, 7217// it is invalid for INT_MIN divisors. So if we have such a lane, 7218// we must fix-up results for said lanes. 7221// NOTE: we avoid letting illegal types through even if we're before legalize 7222// ops – legalization has a hard time producing good code for the code that 7239// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. 7243// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0 7249// To produce final result we need to blend 2 vectors: 'SetCC' and 7250// 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick 7251// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is 7252// constant-folded, select can get lowered to a shuffle with constant mask. 7254 MaskedIsZero, Fold);
7261if (!isa<ConstantSDNode>(
Op.getOperand(0))) {
7263"be a constant integer");
7273EVT VT =
Op.getValueType();
7277// This is specifically a check for the handling of denormal inputs, not the 7285// Testing it with denormal inputs to avoid wrong estimate. 7287// Test = fabs(X) < SmallestNormal 7296bool LegalOps,
bool OptForSize,
7298unsignedDepth)
const{
7299// fneg is removable even if it has multiple uses. 7300if (
Op.getOpcode() ==
ISD::FNEG ||
Op.getOpcode() == ISD::VP_FNEG) {
7302returnOp.getOperand(0);
7305// Don't recurse exponentially. 7309// Pre-increment recursion depth for use in recursive calls. 7313EVT VT =
Op.getValueType();
7314unsigned Opcode =
Op.getOpcode();
7316// Don't allow anything with multiple uses unless we know it is free. 7324auto RemoveDeadNode = [&](
SDValueN) {
7325if (
N &&
N.getNode()->use_empty())
7331// Because getNegatedExpression can delete nodes we need a handle to keep 7332// temporary nodes alive in case the recursion manages to create an identical 7334 std::list<HandleSDNode> Handles;
7338// Don't invert constant FP values after legalization unless the target says 7339// the negated constant is legal. 7345if (LegalOps && !IsOpLegal)
7348APFloat V = cast<ConstantFPSDNode>(
Op)->getValueAPF();
7352// If we already have the use of the negated floating constant, it is free 7353// to negate it even it has multiple uses. 7360// Only permit BUILD_VECTOR of constants. 7362 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7375if (LegalOps && !IsOpLegal)
7384APFloat V = cast<ConstantFPSDNode>(
C)->getValueAPF();
7392if (!
Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7395// After operation legalization, it might not be legal to create new FSUBs. 7400// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y) 7403 getNegatedExpression(
X, DAG, LegalOps, OptForSize, CostX,
Depth);
7404// Prevent this node from being deleted by the next call. 7406 Handles.emplace_back(NegX);
7408// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X) 7411 getNegatedExpression(
Y, DAG, LegalOps, OptForSize, CostY,
Depth);
7413// We're done with the handles. 7416// Negate the X if its cost is less or equal than Y. 7417if (NegX && (CostX <= CostY)) {
7421 RemoveDeadNode(NegY);
7425// Negate the Y if it is not expensive. 7430 RemoveDeadNode(NegX);
7436// We can't turn -(A-B) into B-A when we honor signed zeros. 7437if (!
Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7441// fold (fneg (fsub 0, Y)) -> Y 7448// fold (fneg (fsub X, Y)) -> (fsub Y, X) 7456// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 7459 getNegatedExpression(
X, DAG, LegalOps, OptForSize, CostX,
Depth);
7460// Prevent this node from being deleted by the next call. 7462 Handles.emplace_back(NegX);
7464// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 7467 getNegatedExpression(
Y, DAG, LegalOps, OptForSize, CostY,
Depth);
7469// We're done with the handles. 7472// Negate the X if its cost is less or equal than Y. 7473if (NegX && (CostX <= CostY)) {
7477 RemoveDeadNode(NegY);
7481// Ignore X * 2.0 because that is expected to be canonicalized to X + X. 7483if (
C->isExactlyValue(2.0) &&
Op.getOpcode() ==
ISD::FMUL)
7486// Negate the Y if it is not expensive. 7491 RemoveDeadNode(NegX);
7498if (!
Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7504 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ,
Depth);
7505// Give up if fail to negate the Z. 7509// Prevent this node from being deleted by the next two calls. 7510 Handles.emplace_back(NegZ);
7512// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) 7515 getNegatedExpression(
X, DAG, LegalOps, OptForSize, CostX,
Depth);
7516// Prevent this node from being deleted by the next call. 7518 Handles.emplace_back(NegX);
7520// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) 7523 getNegatedExpression(
Y, DAG, LegalOps, OptForSize, CostY,
Depth);
7525// We're done with the handles. 7528// Negate the X if its cost is less or equal than Y. 7529if (NegX && (CostX <= CostY)) {
7530Cost = std::min(CostX, CostZ);
7533 RemoveDeadNode(NegY);
7537// Negate the Y if it is not expensive. 7539Cost = std::min(CostY, CostZ);
7542 RemoveDeadNode(NegX);
7550if (
SDValue NegV = getNegatedExpression(
Op.getOperand(0), DAG, LegalOps,
7555if (
SDValue NegV = getNegatedExpression(
Op.getOperand(0), DAG, LegalOps,
7561// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS)) 7562// iff at least one cost is cheaper and the other is neutral/cheaper 7566 getNegatedExpression(
LHS, DAG, LegalOps, OptForSize, CostLHS,
Depth);
7568 RemoveDeadNode(NegLHS);
7572// Prevent this node from being deleted by the next call. 7573 Handles.emplace_back(NegLHS);
7578 getNegatedExpression(
RHS, DAG, LegalOps, OptForSize, CostRHS,
Depth);
7580// We're done with the handles. 7586 RemoveDeadNode(NegLHS);
7587 RemoveDeadNode(NegRHS);
7591Cost = std::min(CostLHS, CostRHS);
7592return DAG.
getSelect(
DL, VT,
Op.getOperand(0), NegLHS, NegRHS);
7599//===----------------------------------------------------------------------===// 7600// Legalization Utilities 7601//===----------------------------------------------------------------------===// 7621if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7627// LL, LH, RL, and RH must be either all NULL or all set to a value. 7634if ((
Signed && HasSMUL_LOHI) || (!
Signed && HasUMUL_LOHI)) {
7661// The inputs are both zero-extended. 7662if (MakeMUL_LOHI(LL, RL,
Lo,
Hi,
false)) {
7663 Result.push_back(
Lo);
7664 Result.push_back(
Hi);
7667 Result.push_back(Zero);
7668 Result.push_back(Zero);
7677// The input values are both sign-extended. 7678// TODO non-MUL case? 7679if (MakeMUL_LOHI(LL, RL,
Lo,
Hi,
true)) {
7680 Result.push_back(
Lo);
7681 Result.push_back(
Hi);
7686unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7701if (!MakeMUL_LOHI(LL, RL,
Lo,
Hi,
false))
7704 Result.push_back(
Lo);
7711 Result.push_back(
Hi);
7715// Compute the full width result. 7724if (!MakeMUL_LOHI(LL, RH,
Lo,
Hi,
false))
7727// This is effectively the add part of a multiply-add of half-sized operands, 7728// so it cannot overflow. 7731if (!MakeMUL_LOHI(LH, RL,
Lo,
Hi,
false))
7783bool Ok = expandMUL_LOHI(
N->getOpcode(),
N->getValueType(0),
SDLoc(
N),
7784N->getOperand(0),
N->getOperand(1), Result, HiLoVT,
7785 DAG, Kind, LL, LH, RL, RH);
7787assert(Result.size() == 2);
7794// Optimize unsigned division or remainder by constants for types twice as large 7797// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder 7800// Sum += __builtin_uadd_overflow(Lo, High, &Sum); 7801// Remainder = Sum % Constant 7802// This is based on "Remainder by Summing Digits" from Hacker's Delight. 7804// For division, we can compute the remainder using the algorithm described 7805// above, subtract it from the dividend to get an exact multiple of Constant. 7806// Then multiply that exact multiply by the multiplicative inverse modulo 7807// (1 << (BitWidth / 2)) to get the quotient. 7809// If Constant is even, we can shift right the dividend and the divisor by the 7810// number of trailing zeros in Constant before applying the remainder algorithm. 7811// If we're after the quotient, we can subtract this value from the shifted 7812// dividend and multiply by the multiplicative inverse of the shifted divisor. 7813// If we want the remainder, we shift the value left by the number of trailing 7814// zeros and add the bits that were shifted out of the dividend. 7819unsigned Opcode =
N->getOpcode();
7820EVT VT =
N->getValueType(0);
7822// TODO: Support signed division/remainder. 7827"Unexpected opcode");
7829auto *CN = dyn_cast<ConstantSDNode>(
N->getOperand(1));
7833APInt Divisor = CN->getAPIntValue();
7839// Divisor needs to less than (1 << HBitWidth). 7841if (Divisor.
uge(HalfMaxPlus1))
7844// We depend on the UREM by constant optimization in DAGCombiner that requires 7850// Don't expand if optimizing for size. 7854// Early out for 0 or 1 divisors. 7858// If the divisor is even, shift it until it becomes odd. 7859unsigned TrailingZeros = 0;
7869// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and 7870// then add in the carry. 7871// TODO: If we can't split it in half, we might be able to split into 3 or 7872// more pieces using a smaller bit width. 7873if (HalfMaxPlus1.
urem(Divisor).
isOne()) {
7874assert(!LL == !LH &&
"Expected both input halves or no input halves!");
7876 std::tie(LL, LH) = DAG.
SplitScalar(
N->getOperand(0), dl, HiLoVT, HiLoVT);
7878// Shift the input by the number of TrailingZeros in the divisor. The 7879// shifted out bits will be added to the remainder later. 7881// Save the shifted off bits if we need the remainder. 7899// Use uaddo_carry if we can, otherwise use a compare to detect overflow. 7910// If the boolean for the target is 0 or 1, we can add the setcc result 7922// If we didn't find a sum, we can't do the expansion. 7926// Perform a HiLoVT urem on the Sum using truncated divisor. 7933// Subtract the remainder from the shifted dividend. 7939// Multiply by the multiplicative inverse of the divisor modulo 7946// Split the quotient into low and high parts. 7948 std::tie(QuotL, QuotH) = DAG.
SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7949 Result.push_back(QuotL);
7950 Result.push_back(QuotH);
7954// If we shifted the input, shift the remainder left and add the bits we 7955// shifted off the input. 7962 Result.push_back(RemL);
7969// Check that (every element of) Z is undef or not an exact multiple of BW. 7988bool IsFSHL =
Node->getOpcode() == ISD::VP_FSHL;
7991EVT ShVT = Z.getValueType();
7993// fshl: X << C | Y >> (BW - C) 7994// fshr: X << (BW - C) | Y >> C 7995// where C = Z % BW is not zero 7997 ShAmt = DAG.
getNode(ISD::VP_UREM,
DL, ShVT, Z, BitWidthC, Mask, VL);
7998 InvShAmt = DAG.
getNode(ISD::VP_SUB,
DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7999 ShX = DAG.
getNode(ISD::VP_SHL,
DL, VT,
X, IsFSHL ? ShAmt : InvShAmt, Mask,
8001 ShY = DAG.
getNode(ISD::VP_SRL,
DL, VT,
Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8004// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) 8005// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) 8008// Z % BW -> Z & (BW - 1) 8009 ShAmt = DAG.
getNode(ISD::VP_AND,
DL, ShVT, Z, BitMask, Mask, VL);
8010// (BW - 1) - (Z % BW) -> ~Z & (BW - 1) 8013 InvShAmt = DAG.
getNode(ISD::VP_AND,
DL, ShVT, NotZ, BitMask, Mask, VL);
8016 ShAmt = DAG.
getNode(ISD::VP_UREM,
DL, ShVT, Z, BitWidthC, Mask, VL);
8017 InvShAmt = DAG.
getNode(ISD::VP_SUB,
DL, ShVT, BitMask, ShAmt, Mask, VL);
8022 ShX = DAG.
getNode(ISD::VP_SHL,
DL, VT,
X, ShAmt, Mask, VL);
8024 ShY = DAG.
getNode(ISD::VP_SRL,
DL, VT, ShY1, InvShAmt, Mask, VL);
8027 ShX = DAG.
getNode(ISD::VP_SHL,
DL, VT, ShX1, InvShAmt, Mask, VL);
8028 ShY = DAG.
getNode(ISD::VP_SRL,
DL, VT,
Y, ShAmt, Mask, VL);
8031return DAG.
getNode(ISD::VP_OR,
DL, VT, ShX, ShY, Mask, VL);
8036if (Node->isVPOpcode())
8039EVT VT = Node->getValueType(0);
8049SDValue Z = Node->getOperand(2);
8052bool IsFSHL = Node->getOpcode() ==
ISD::FSHL;
8055EVT ShVT = Z.getValueType();
8057// If a funnel shift in the other direction is more supported, use it. 8062// fshl X, Y, Z -> fshr X, Y, -Z 8063// fshr X, Y, Z -> fshl X, Y, -Z 8067// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z 8068// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z 8085// fshl: X << C | Y >> (BW - C) 8086// fshr: X << (BW - C) | Y >> C 8087// where C = Z % BW is not zero 8094// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) 8095// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) 8098// Z % BW -> Z & (BW - 1) 8100// (BW - 1) - (Z % BW) -> ~Z & (BW - 1) 8122// TODO: Merge with expandFunnelShift. 8125EVT VT = Node->getValueType(0);
8127bool IsLeft = Node->getOpcode() ==
ISD::ROTL;
8128SDValue Op0 = Node->getOperand(0);
8129SDValue Op1 = Node->getOperand(1);
8135// If a rotate in the other direction is more supported, use it. 8140return DAG.
getNode(RevRot,
DL, VT, Op0, Sub);
8143if (!AllowVectorOps && VT.
isVector() &&
8157// (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1)) 8158// (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1)) 8161 ShVal = DAG.
getNode(ShOpc,
DL, VT, Op0, ShAmt);
8163 HsVal = DAG.
getNode(HsOpc,
DL, VT, Op0, HsAmt);
8165// (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w)) 8166// (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w)) 8169 ShVal = DAG.
getNode(ShOpc,
DL, VT, Op0, ShAmt);
8180assert(Node->getNumOperands() == 3 &&
"Not a double-shift!");
8181EVT VT = Node->getValueType(0);
8187SDValue ShOpLo = Node->getOperand(0);
8188SDValue ShOpHi = Node->getOperand(1);
8189SDValue ShAmt = Node->getOperand(2);
8195// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and 8196// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized 8213// If the shift amount is larger or equal than the width of a part we don't 8214// use the result from the FSHL/FSHR. Insert a test and select the appropriate 8215// values for large shift amounts. 8232unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8233SDValue Src = Node->getOperand(OpNo);
8234EVT SrcVT = Src.getValueType();
8235EVT DstVT = Node->getValueType(0);
8238// FIXME: Only f32 to i64 conversions are supported. 8239if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8242if (Node->isStrictFPOpcode())
8243// When a NaN is converted to an integer a trap is allowed. We can't 8244// use this expansion here because it would eliminate that trap. Other 8245// traps are also allowed and cannot be eliminated. See 8246// IEEE 754-2008 sec 5.8. 8249// Expand f32 -> i64 conversion 8250// This algorithm comes from compiler-rt's implementation of fixsfdi: 8251// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c 8305unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8306SDValue Src = Node->getOperand(OpNo);
8308EVT SrcVT = Src.getValueType();
8309EVT DstVT = Node->getValueType(0);
8315// Only expand vector types if we have the appropriate vector bit operations. 8322// If the maximum float value is smaller then the signed integer range, 8323// the destination signmask can't be represented by the float, so we can 8324// just use FP_TO_SINT directly. 8330if (Node->isStrictFPOpcode()) {
8332 { Node->getOperand(0), Src });
8333 Chain = Result.getValue(1);
8339// Don't expand it if there isn't cheap fsub instruction. 8347if (Node->isStrictFPOpcode()) {
8349 Node->getOperand(0),
/*IsSignaling*/true);
8355bool Strict = Node->isStrictFPOpcode() ||
8359// Expand based on maximum range of FP_TO_SINT, if the value exceeds the 8360// signmask then offset (the result of which should be fully representable). 8361// Sel = Src < 0x8000000000000000 8362// FltOfs = select Sel, 0, 0x8000000000000000 8363// IntOfs = select Sel, 0, 0x8000000000000000 8364// Result = fp_to_sint(Src - FltOfs) ^ IntOfs 8366// TODO: Should any fast-math-flags be set for the FSUB? 8374if (Node->isStrictFPOpcode()) {
8376 { Chain, Src, FltOfs });
8386// Expand based on maximum range of FP_TO_SINT: 8387// True = fp_to_sint(Src) 8388// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000) 8389// Result = select (Src < 0x8000000000000000), True, False 8392// TODO: Should any fast-math-flags be set for the FSUB? 8398 Result = DAG.
getSelect(dl, DstVT, Sel, True, False);
8405// This transform is not correct for converting 0 when rounding mode is set 8406// to round toward negative infinity which will produce -0.0. So disable 8408if (Node->isStrictFPOpcode())
8411SDValue Src = Node->getOperand(0);
8412EVT SrcVT = Src.getValueType();
8413EVT DstVT = Node->getValueType(0);
8415// If the input is known to be non-negative and SINT_TO_FP is legal then use 8417if (Node->getFlags().hasNonNeg() &&
8427// Only expand vector types if we have the appropriate vector bit 8438// Implementation of unsigned i64 to f64 following the algorithm in 8439// __floatundidf in compiler_rt. This implementation performs rounding 8440// correctly in all rounding modes with the exception of converting 0 8441// when rounding toward negative infinity. In that case the fsub will 8442// produce -0.0. This will be added to +0.0 and produce -0.0 which is 8446 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8465unsigned Opcode = Node->getOpcode();
8470if (Node->getFlags().hasNoNaNs()) {
8472EVT VT = Node->getValueType(0);
8477SDValue Op1 = Node->getOperand(0);
8478SDValue Op2 = Node->getOperand(1);
8480// Copy FMF flags, but always set the no-signed-zeros flag 8481// as this is implied by the FMINNUM/FMAXNUM semantics. 8491if (
SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8494EVT VT = Node->getValueType(0);
8497"Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8504SDValue Quiet0 = Node->getOperand(0);
8505SDValue Quiet1 = Node->getOperand(1);
8507if (!Node->getFlags().hasNoNaNs()) {
8508// Insert canonicalizes if it's possible we need to quiet to get correct 8520return DAG.
getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8523// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that 8524// instead if there are no NaNs and there can't be an incompatible zero 8525// compare: at least one operand isn't +/-0, or there are no signed-zeros. 8526if ((Node->getFlags().hasNoNaNs() ||
8529 (Node->getFlags().hasNoSignedZeros() ||
8532unsigned IEEE2018Op =
8535return DAG.
getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8536 Node->getOperand(1), Node->getFlags());
8539if (
SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8547if (
SDValue Expanded = expandVectorNaryOpBySplitting(
N, DAG))
8553unsigned Opc =
N->getOpcode();
8554EVT VT =
N->getValueType(0);
8559// First, implement comparison not propagating NaN. If no native fmin or fmax 8560// available, use plain select with setcc instead. 8565// FIXME: We should probably define fminnum/fmaxnum variants with correct 8566// signed zero behavior. 8567bool MinMaxMustRespectOrderedZero =
false;
8571 MinMaxMustRespectOrderedZero =
true;
8578// NaN (if exists) will be propagated later, so orderness doesn't matter. 8584// Propagate any NaN of both operands 8585if (!
N->getFlags().hasNoNaNs() &&
8593// fminimum/fmaximum requires -0.0 less than +0.0 8594if (!MinMaxMustRespectOrderedZero && !
N->getFlags().hasNoSignedZeros() &&
8617unsigned Opc = Node->getOpcode();
8618EVT VT = Node->getValueType(0);
8628if (!Flags.hasNoNaNs()) {
8629// Insert canonicalizes if it's possible we need to quiet to get correct 8642// We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has 8643// same behaviors for all of other cases: +0.0 vs -0.0 included. 8644if (Flags.hasNoNaNs() ||
8646unsigned IEEE2019Op =
8652// FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return 8653// either one for +0.0 vs -0.0. 8654if ((Flags.hasNoNaNs() ||
8666// If only one operand is NaN, override it with another operand. 8676// If MinMax is NaN, let's quiet it. 8682// Fixup signed zero behavior. 8683if (
Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8700/// Returns a true value if if this FPClassTest can be performed with an ordered 8701/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns 8702/// std::nullopt if it cannot be performed as a compare with 0. 8708bool IsOrdered = NanTest ==
fcNone;
8709bool IsUnordered = NanTest ==
fcNan;
8711// Skip cases that are testing for only a qnan or snan. 8712if (!IsOrdered && !IsUnordered)
8715if (OrderedMask ==
fcZero &&
8728EVT OperandVT =
Op.getValueType();
8732// Degenerated cases. 8738// PPC double double is a pair of doubles, of which the higher part determines 8740if (OperandVT == MVT::ppcf128) {
8743 OperandVT = MVT::f64;
8746// Floating-point type properties. 8750bool IsF80 = (ScalarFloatVT == MVT::f80);
8752// Some checks can be implemented using float comparisons, if floating point 8753// exceptions are ignored. 8754if (Flags.hasNoFPExcept() &&
8757bool IsInvertedFP =
false;
8761 FPTestMask = InvertedFPCheck;
8768// See if we can fold an | fcNan into an unordered compare. 8769FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8771// Can't fold the ordered check if we're only testing for snan or qnan 8774 OrderedFPTestMask = FPTestMask;
8776constbool IsOrdered = FPTestMask == OrderedFPTestMask;
8778if (std::optional<bool> IsCmp0 =
8781 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8784// If denormals could be implicitly treated as 0, this is not equivalent 8785// to a compare with 0 since it will also be true for denormals. 8788 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8791if (FPTestMask ==
fcNan &&
8797bool IsOrderedInf = FPTestMask ==
fcInf;
8800 : UnorderedCmpOpcode,
8806// isinf(x) --> fabs(x) == inf 8811 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8816 : UnorderedCmpOpcode,
8818// isposinf(x) --> x == inf 8819// isneginf(x) --> x == -inf 8820// isposinf(x) || nan --> x u== inf 8821// isneginf(x) || nan --> x u== -inf 8827 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8831// TODO: Could handle ordered case, but it produces worse code for 8832// x86. Maybe handle ordered if fabs is free? 8839// (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal 8841// TODO: Maybe only makes sense if fabs is free. Integer test of 8842// exponent bits seems better for x86. 8846return DAG.
getSetCC(
DL, ResultVT, Abs, SmallestNormal,
8847 IsOrdered ? OrderedOp : UnorderedOp);
8852// TODO: Handle unordered 8861// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal) 8870 DAG.
getSetCC(
DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8872return DAG.
getNode(LogicOp,
DL, ResultVT, IsFinite, IsNormal);
8877// Some checks may be represented as inversion of simpler check, for example 8878// "inf|normal|subnormal|zero" => !"nan". 8879bool IsInverted =
false;
8882Test = InvertedCheck;
8886// In the general case use integer operations. 8898constunsigned ExplicitIntBitInF80 = 63;
8901 ExpMask.
clearBit(ExplicitIntBitInF80);
8915constauto appendResult = [&](
SDValue PartialRes) {
8924SDValue IntBitIsSetV;
// Explicit integer bit in f80 mantissa is set. 8925constauto getIntBitIsSet = [&]() ->
SDValue {
8927APInt IntBitMask(BitSize, 0);
8928 IntBitMask.
setBit(ExplicitIntBitInF80);
8936// Split the value into sign bit and absolute value. 8941// Tests that involve more than one class should be processed first. 8945 ;
// Detect finite numbers of f80 by checking individual classes because 8946// they have different settings of the explicit integer bit. 8948// finite(V) ==> abs(V) < exp_mask 8952// finite(V) && V > 0 ==> V < exp_mask 8954Test &= ~fcPosFinite;
8956// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1 8959Test &= ~fcNegFinite;
8961 appendResult(PartialRes);
8964// fcZero | fcSubnormal => test all exponent bits are 0 8965// TODO: Handle sign bit specific cases 8970 appendResult(ExpIsZero);
8975// Check for individual classes. 8980elseif (PartialCheck ==
fcZero)
8982else// ISD::fcNegZero 8984 appendResult(PartialRes);
8988// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set) 8989// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set) 8997 appendResult(PartialRes);
9000if (
unsigned PartialCheck =
Test &
fcInf) {
9003elseif (PartialCheck ==
fcInf)
9005else {
// ISD::fcNegInf 9010 appendResult(PartialRes);
9013if (
unsigned PartialCheck =
Test &
fcNan) {
9014APInt InfWithQnanBit = Inf | QNaNBitMask;
9016if (PartialCheck ==
fcNan) {
9017// isnan(V) ==> abs(V) > int(inf) 9020// Recognize unsupported values as NaNs for compatibility with glibc. 9021// In them (exp(V)==0) == int_bit. 9029 }
elseif (PartialCheck ==
fcQNan) {
9030// isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit) 9033 }
else {
// ISD::fcSNan 9034// issignaling(V) ==> abs(V) > unsigned(Inf) && 9035// abs(V) < (unsigned(Inf) | quiet_bit) 9041 appendResult(PartialRes);
9045// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1)) 9046APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
9049APInt ExpLimit = ExpMask - ExpLSB;
9062 appendResult(PartialRes);
9072// Only expand vector types if we have the appropriate vector bit operations. 9085EVT VT = Node->getValueType(0);
9091// TODO: Add support for irregular type lengths. 9092if (!(Len <= 128 && Len % 8 == 0))
9095// Only expand vector types if we have the appropriate vector bit operations. 9099// This is the "best" algorithm from 9100// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel 9108// v = v - ((v >> 1) & 0x55555555...) 9114// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) 9120// v = (v + (v >> 4)) & 0x0F0F0F0F... 9130// Avoid the multiply if we only have 2 bytes to add. 9131// TODO: Only doing this for scalars because vectors weren't as obviously 9134// v = (v + (v >> 8)) & 0x00FF; 9142// v = (v * 0x01010101...) >> (Len - 8) 9151for (
unsigned Shift = 8; Shift < Len; Shift *= 2) {
9162EVT VT = Node->getValueType(0);
9165SDValue Mask = Node->getOperand(1);
9166SDValue VL = Node->getOperand(2);
9170// TODO: Add support for irregular type lengths. 9171if (!(Len <= 128 && Len % 8 == 0))
9174// This is same algorithm of expandCTPOP from 9175// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel 9183SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9185// v = v - ((v >> 1) & 0x55555555...) 9186 Tmp1 = DAG.
getNode(ISD::VP_AND, dl, VT,
9190Op = DAG.
getNode(ISD::VP_SUB, dl, VT,
Op, Tmp1, Mask, VL);
9192// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) 9193 Tmp2 = DAG.
getNode(ISD::VP_AND, dl, VT,
Op, Mask33, Mask, VL);
9194 Tmp3 = DAG.
getNode(ISD::VP_AND, dl, VT,
9198Op = DAG.
getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9200// v = (v + (v >> 4)) & 0x0F0F0F0F... 9203 Tmp5 = DAG.
getNode(ISD::VP_ADD, dl, VT,
Op, Tmp4, Mask, VL);
9204Op = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9209// v = (v * 0x01010101...) >> (Len - 8) 9215 V = DAG.
getNode(ISD::VP_MUL, dl, VT,
Op, Mask01, Mask, VL);
9218for (
unsigned Shift = 8; Shift < Len; Shift *= 2) {
9220 V = DAG.
getNode(ISD::VP_ADD, dl, VT, V,
9221 DAG.
getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9231EVT VT = Node->getValueType(0);
9236// If the non-ZERO_UNDEF version is supported we can use that instead. 9241// If the ZERO_UNDEF version is supported use that and handle the zero case. 9252// Only expand vector types if we have the appropriate vector bit operations. 9253// This includes the operations needed to expand CTPOP if it isn't supported. 9261// for now, we do this: 9266// x = x | (x >>32); // for 64-bit input 9267// return popcount(~x); 9269// Ref: "Hacker's Delight" by Henry Warren 9270for (
unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9281EVT VT = Node->getValueType(0);
9284SDValue Mask = Node->getOperand(1);
9285SDValue VL = Node->getOperand(2);
9293// x = x | (x >>32); // for 64-bit input 9294// return popcount(~x); 9295for (
unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9298 DAG.
getNode(ISD::VP_SRL, dl, VT,
Op, Tmp, Mask, VL), Mask,
9303return DAG.
getNode(ISD::VP_CTPOP, dl, VT,
Op, Mask, VL);
9312 :
APInt(64, 0x0218A392CD3D5DBFULL);
9326for (
unsigned i = 0; i <
BitWidth; i++) {
9332// Create a ConstantArray in Constant Pool 9352EVT VT = Node->getValueType(0);
9356// If the non-ZERO_UNDEF version is supported we can use that instead. 9361// If the ZERO_UNDEF version is supported use that and handle the zero case. 9372// Only expand vector types if we have the appropriate vector bit operations. 9373// This includes the operations needed to expand CTPOP if it isn't supported. 9383// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal. 9386if (
SDValue V = CTTZTableLookup(Node, DAG, dl, VT,
Op, NumBitsPerElt))
9389// for now, we use: { return popcount(~x & (x - 1)); } 9390// unless the target has ctlz but not ctpop, in which case we use: 9391// { return 32 - nlz(~x & (x-1)); } 9392// Ref: "Hacker's Delight" by Henry Warren 9397// If ISD::CTLZ is legal and CTPOP isn't, then do that instead. 9408SDValue Mask = Node->getOperand(1);
9409SDValue VL = Node->getOperand(2);
9411EVT VT = Node->getValueType(0);
9413// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1)) 9418SDValue Tmp = DAG.
getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9419return DAG.
getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9424// %cond = to_bool_vec %source 9425// %splat = splat /*val=*/VL 9427// %v = vp.select %cond, /*true=*/tz, /*false=*/%splat 9428// %r = vp.reduce.umin %v 9433EVT SrcVT = Source.getValueType();
9434EVT ResVT =
N->getValueType(0);
9438// Convert to boolean vector. 9443 Source = DAG.
getNode(ISD::VP_SETCC,
DL, SrcVT, Source, AllZero,
9451 DAG.
getNode(ISD::VP_SELECT,
DL, ResVecVT, Source, StepVec,
Splat, EVL);
9452return DAG.
getNode(ISD::VP_REDUCE_UMIN,
DL, ResVT, ExtEVL,
Select, Mask, EVL);
9459EVT MaskVT = Mask.getValueType();
9462// Find a suitable type for a stepvector. 9463ConstantRange VScaleRange(1,
/*isFullSet=*/true);
// Fixed length default. 9469/*ZeroIsPoison=*/true, &VScaleRange);
9473// If promotion is required to make the type legal, do it here; promotion 9474// of integers within LegalizeVectorOps is looking for types of the same 9475// size but with a smaller number of larger elements, not the usual larger 9476// size with the same number of larger elements. 9483// Zero out lanes with inactive elements, then find the highest remaining 9484// value from the stepvector. 9493bool IsNegative)
const{
9495EVT VT =
N->getValueType(0);
9498// abs(x) -> smax(x,sub(0,x)) 9507// abs(x) -> umin(x,sub(0,x)) 9516// 0 - abs(x) -> smin(x, sub(0,x)) 9525// Only expand vector types if we have the appropriate vector operations. 9539// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 9543// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) 9549EVT VT =
N->getValueType(0);
9554// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs)) 9555// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs)) 9564// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs)) 9570// If the subtract doesn't overflow then just use abs(sub()) 9571// NOTE: don't use frozen operands for value tracking. 9589// Branchless expansion iff cmp result is allbits: 9590// abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs))) 9591// abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs))) 9598// Similar to the branchless expansion, use the (sign-extended) usubo overflow 9599// flag if the (scalar) type is illegal as this is more likely to legalize 9601// abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs)) 9610// FIXME: Should really try to split the vector in case it's legal on a 9615// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs)) 9616// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs)) 9623EVT VT =
N->getValueType(0);
9627unsigned Opc =
N->getOpcode();
9638// If the operands are already extended, we can add+shift. 9648return DAG.
getNode(ShiftOpc, dl, VT, Sum,
9652// For scalars, see if we can efficiently extend/truncate to use add+shift. 9663// Just use SRL as we will be truncating away the extended sign bits. 9670// avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1)) 9678// Right shift the sum by 1 9690// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1)) 9691// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1)) 9692// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1)) 9693// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1)) 9700return DAG.
getNode(SumOpc, dl, VT, Sign, Shift);
9705EVT VT =
N->getValueType(0);
9712SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9717// Use a rotate by 8. This can be further expanded if necessary. 9763EVT VT =
N->getValueType(0);
9772SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9781return DAG.
getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9791 Tmp2 = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp2,
9795 Tmp4 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9796 Tmp2 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9797return DAG.
getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9801 Tmp7 = DAG.
getNode(ISD::VP_AND, dl, VT,
Op,
9805 Tmp6 = DAG.
getNode(ISD::VP_AND, dl, VT,
Op,
9806 DAG.
getConstant(255ULL << 16, dl, VT), Mask, EVL);
9809 Tmp5 = DAG.
getNode(ISD::VP_AND, dl, VT,
Op,
9810 DAG.
getConstant(255ULL << 24, dl, VT), Mask, EVL);
9815 Tmp4 = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp4,
9816 DAG.
getConstant(255ULL << 24, dl, VT), Mask, EVL);
9819 Tmp3 = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp3,
9820 DAG.
getConstant(255ULL << 16, dl, VT), Mask, EVL);
9823 Tmp2 = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp2,
9827 Tmp8 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9828 Tmp6 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9829 Tmp4 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9830 Tmp2 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9831 Tmp8 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9832 Tmp4 = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9833return DAG.
getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9839EVT VT =
N->getValueType(0);
9846// If we can, perform BSWAP first and then the mask+swap the i4, then i2 9847// and finally the i1 pairs. 9848// TODO: We can easily support i4/i2 legal types if any target ever does. 9850// Create the masks - repeating the pattern every byte. 9855// BSWAP if the type is wider than a single byte. 9858// swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) 9865// swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) 9872// swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) 9882for (
unsignedI = 0, J = Sz-1;
I < Sz; ++
I, --J) {
9899assert(
N->getOpcode() == ISD::VP_BITREVERSE);
9902EVT VT =
N->getValueType(0);
9911// If we can, perform BSWAP first and then the mask+swap the i4, then i2 9912// and finally the i1 pairs. 9913// TODO: We can easily support i4/i2 legal types if any target ever does. 9915// Create the masks - repeating the pattern every byte. 9920// BSWAP if the type is wider than a single byte. 9921 Tmp = (Sz > 8 ? DAG.
getNode(ISD::VP_BSWAP, dl, VT,
Op, Mask, EVL) :
Op);
9923// swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) 9926 Tmp2 = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp2,
9932 Tmp = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9934// swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) 9937 Tmp2 = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp2,
9943 Tmp = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9945// swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) 9948 Tmp2 = DAG.
getNode(ISD::VP_AND, dl, VT, Tmp2,
9954 Tmp = DAG.
getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9960std::pair<SDValue, SDValue>
9964SDValue Chain = LD->getChain();
9965SDValue BasePTR = LD->getBasePtr();
9966EVT SrcVT = LD->getMemoryVT();
9967EVT DstVT = LD->getValueType(0);
9978// A vector must always be stored in memory as-is, i.e. without any padding 9979// between the elements, since various code depend on it, e.g. in the 9980// handling of a bitcast of a vector type to int, which may be done with a 9981// vector store followed by an integer load. A vector that does not have 9982// elements that are byte-sized must therefore be stored as an integer 9983// built out of the extracted vector elements. 9995// Load the whole vector and avoid masking off the top bits as it makes 9996// the codegen worse. 9999 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
10000 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10003for (
unsignedIdx = 0;
Idx < NumElem; ++
Idx) {
10004unsigned ShiftIntoIdx =
10015 Scalar = DAG.
getNode(ExtendOp, SL, DstEltVT, Scalar);
10022return std::make_pair(
Value, Load.getValue(1));
10031for (
unsignedIdx = 0;
Idx < NumElem; ++
Idx) {
10033 DAG.
getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
10034 LD->getPointerInfo().getWithOffset(
Idx * Stride),
10035 SrcEltVT, LD->getOriginalAlign(),
10036 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10047return std::make_pair(
Value, NewChain);
10054SDValue Chain = ST->getChain();
10055SDValue BasePtr = ST->getBasePtr();
10057EVT StVT = ST->getMemoryVT();
10062// The type of the data we want to save 10066// The type of data as saved in memory. 10071// A vector must always be stored in memory as-is, i.e. without any padding 10072// between the elements, since various code depend on it, e.g. in the 10073// handling of a bitcast of a vector type to int, which may be done with a 10074// vector store followed by an integer load. A vector that does not have 10075// elements that are byte-sized must therefore be stored as an integer 10076// built out of the extracted vector elements. 10083for (
unsignedIdx = 0;
Idx < NumElem; ++
Idx) {
10088unsigned ShiftIntoIdx =
10097return DAG.
getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10098 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10102// Store Stride in bytes 10104assert(Stride &&
"Zero stride!");
10105// Extract each of the elements from the original vector and save them into 10106// memory individually. 10108for (
unsignedIdx = 0;
Idx < NumElem; ++
Idx) {
10115// This scalar TruncStore may be illegal, but we legalize it later. 10117 Chain, SL, Elt,
Ptr, ST->getPointerInfo().getWithOffset(
Idx * Stride),
10118 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10127std::pair<SDValue, SDValue>
10130"unaligned indexed loads not implemented!");
10131SDValue Chain = LD->getChain();
10133EVT VT = LD->getValueType(0);
10134EVT LoadedVT = LD->getMemoryVT();
10143// Scalarize the load and let the individual components be handled. 10144return scalarizeVectorLoad(LD, DAG);
10147// Expand to a (misaligned) integer load of the same size, 10148// then bitconvert to floating point or vector. 10150 LD->getMemOperand());
10156return std::make_pair(Result, newLoad.
getValue(1));
10159// Copy the value to a (aligned) stack slot using (unaligned) integer 10160// loads and stores, then do a (aligned) load from the stack slot. 10164unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10166// Make sure the stack slot is also aligned for the register type. 10168auto FrameIndex = cast<FrameIndexSDNode>(StackBase.
getNode())->getIndex();
10173EVT PtrVT =
Ptr.getValueType();
10174EVT StackPtrVT = StackPtr.getValueType();
10179// Do all but one copies using the full register width. 10180for (
unsigned i = 1; i < NumRegs; i++) {
10181// Load one integer register's worth from the original location. 10183 RegVT, dl, Chain,
Ptr, LD->getPointerInfo().getWithOffset(
Offset),
10184 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10186// Follow the load with a store to the stack slot. Remember the store. 10188 Load.getValue(1), dl, Load, StackPtr,
10190// Increment the pointers. 10197// The last copy may be partial. Do an extending load. 10199 8 * (LoadedBytes -
Offset));
10202 LD->getPointerInfo().getWithOffset(
Offset), MemVT,
10203 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10205// Follow the load with a store to the stack slot. Remember the store. 10206// On big-endian machines this requires a truncating store to ensure 10207// that the bits end up in the right place. 10209 Load.getValue(1), dl, Load, StackPtr,
10212// The order of the stores doesn't matter - say it with a TokenFactor. 10215// Finally, perform the original load only redirected to the stack slot. 10216 Load = DAG.
getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10220// Callers expect a MERGE_VALUES node. 10221return std::make_pair(Load, TF);
10225"Unaligned load of unsupported type.");
10227// Compute the new VT that is half the size of the old one. This is an 10234Align Alignment = LD->getOriginalAlign();
10235unsigned IncrementSize = NumBits / 8;
10238// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. 10242// Load the value in two parts 10246 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10251 LD->getPointerInfo().getWithOffset(IncrementSize),
10252 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10255Hi = DAG.
getExtLoad(HiExtType, dl, VT, Chain,
Ptr, LD->getPointerInfo(),
10256 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10261 LD->getPointerInfo().getWithOffset(IncrementSize),
10262 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10266// aggregate the two parts 10274return std::make_pair(Result, TF);
10280"unaligned indexed stores not implemented!");
10281SDValue Chain = ST->getChain();
10285Align Alignment = ST->getOriginalAlign();
10287EVT StoreMemVT = ST->getMemoryVT();
10295// Scalarize the store and let the individual components be handled. 10299// Expand to a bitconvert of the value to the integer type of the 10300// same size, then a (misaligned) int store. 10301// FIXME: Does not handle truncating floating point stores! 10303 Result = DAG.
getStore(Chain, dl, Result,
Ptr, ST->getPointerInfo(),
10304 Alignment, ST->getMemOperand()->getFlags());
10307// Do a (aligned) store to a stack slot, then copy from the stack slot 10308// to the final destination using (unaligned) integer loads and stores. 10312EVT PtrVT =
Ptr.getValueType();
10315unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10317// Make sure the stack slot is also aligned for the register type. 10319auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10321// Perform the original store, only redirected to the stack slot. 10323 Chain, dl, Val, StackPtr,
10326EVT StackPtrVT = StackPtr.getValueType();
10333// Do all but one copies using the full register width. 10334for (
unsigned i = 1; i < NumRegs; i++) {
10335// Load one integer register's worth from the stack slot. 10337 RegVT, dl, Store, StackPtr,
10339// Store it to the final location. Remember the store. 10341 ST->getPointerInfo().getWithOffset(
Offset),
10342 ST->getOriginalAlign(),
10343 ST->getMemOperand()->getFlags()));
10344// Increment the pointers. 10350// The last store may be partial. Do a truncating store. On big-endian 10351// machines this requires an extending load from the stack slot to ensure 10352// that the bits are in the right place. 10356// Load from the stack slot. 10363 ST->getPointerInfo().getWithOffset(
Offset), LoadMemVT,
10364 ST->getOriginalAlign(),
10365 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10366// The order of the stores doesn't matter - say it with a TokenFactor. 10372"Unaligned store of unknown type.");
10373// Get the half-size VT 10376unsigned IncrementSize = NumBits / 8;
10378// Divide the stored value in two parts. 10382// If Val is a constant, replace the upper bits with 0. The SRL will constant 10383// fold and not use the upper bits. A smaller constant may be easier to 10385if (
auto *
C = dyn_cast<ConstantSDNode>(
Lo);
C && !
C->isOpaque())
10392// Store the two parts 10396Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10397 ST->getMemOperand()->getFlags());
10402 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10403 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10414bool IsCompressedMemory)
const{
10416EVT AddrVT =
Addr.getValueType();
10417EVT MaskVT = Mask.getValueType();
10419"Incompatible types of Data and Mask");
10420if (IsCompressedMemory) {
10423"Cannot currently handle compressed memory with scalable vectors");
10424// Incrementing the pointer according to number of '1's in the mask. 10429 MaskIntVT = MVT::i32;
10432// Count '1's with POPCNT. 10435// Scale is an element size in bytes. 10453"Cannot index a scalable vector within a fixed-width vector");
10457EVT IdxVT =
Idx.getValueType();
10460// If this is a constant index and we know the value plus the number of the 10461// elements in the subvector minus one is less than the minimum number of 10462// elements then it's safe to return Idx. 10463if (
auto *IdxCst = dyn_cast<ConstantSDNode>(
Idx))
10464if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10478unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10486return getVectorSubVecPointer(
10487 DAG, VecPtr, VecVT,
10497// Make sure the index type is big enough to compute in. 10502// Calculate the element offset and add it to the pointer. 10505"Converting bits to bytes lost precision");
10507"Sub-vector must be a vector with matching element type");
10511EVT IdxVT = Index.getValueType();
10522//===----------------------------------------------------------------------===// 10523// Implementation of Emulated TLS Model 10524//===----------------------------------------------------------------------===// 10528// Access to address of TLS varialbe xyz is lowered to a function call: 10529// __emutls_get_address( address of global variable named "__emutls_v.xyz" ) 10543assert(EmuTlsVar &&
"Cannot find EmuTlsVar ");
10545 Entry.Ty = VoidPtrType;
10546 Args.push_back(Entry);
10553 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10555// TLSADDR will be codegen'ed as call. Inform MFI that function has calls. 10556// At last for X86 targets, maybe good for other targets too? 10562"Emulated TLS must have zero offset in GlobalAddressSDNode");
10563return CallResult.first;
10574EVT VT =
Op.getOperand(0).getValueType();
10576if (VT.
bitsLT(MVT::i32)) {
10590SDValue Op0 = Node->getOperand(0);
10591SDValue Op1 = Node->getOperand(1);
10594unsigned Opcode = Node->getOpcode();
10597// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits 10606// umin(x,y) -> sub(x,usubsat(x,y)) 10607// TODO: Missing freeze(Op0)? 10614// umax(x,y) -> add(x,usubsat(y,x)) 10615// TODO: Missing freeze(Op0)? 10622// FIXME: Should really try to split the vector in case it's legal on a 10627// Attempt to find an existing SETCC node that we can reuse. 10628// TODO: Do we need a generic doesSETCCNodeExist? 10629// TODO: Missing freeze(Op0)/freeze(Op1)? 10636 {Op0, Op1, DAG.getCondCode(CC)})) {
10643 {Op0, Op1, DAG.getCondCode(CC)})) {
10652// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B 10653// -> Y = (A < B) ? B : A 10654// -> Y = (A >= B) ? A : B 10655// -> Y = (A <= B) ? B : A 10671unsigned Opcode = Node->getOpcode();
10674EVT VT =
LHS.getValueType();
10677assert(VT ==
RHS.getValueType() &&
"Expected operands to be the same type");
10680// usub.sat(a, b) -> umax(a, b) - b 10686// uadd.sat(a, b) -> umin(a, ~b) + b 10693unsigned OverflowOp;
10708llvm_unreachable(
"Expected method to receive signed or unsigned saturation " 10709"addition or subtraction node.");
10712// FIXME: Should really try to split the vector in case it's legal on a 10720SDValue SumDiff = Result.getValue(0);
10721SDValue Overflow = Result.getValue(1);
10727// (LHS + RHS) | OverflowMask 10731// Overflow ? 0xffff.... : (LHS + RHS) 10737// (LHS - RHS) & ~OverflowMask 10742// Overflow ? 0 : (LHS - RHS) 10743return DAG.
getSelect(dl, VT, Overflow, Zero, SumDiff);
10753// If either of the operand signs are known, then they are guaranteed to 10754// only saturate in one direction. If non-negative they will saturate 10755// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN. 10757// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the 10758// sign of 'y' has to be flipped. 10763if (LHSIsNonNegative || RHSIsNonNegative) {
10765return DAG.
getSelect(dl, VT, Overflow, SatMax, SumDiff);
10771if (LHSIsNegative || RHSIsNegative) {
10773return DAG.
getSelect(dl, VT, Overflow, SatMin, SumDiff);
10777// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff 10783return DAG.
getSelect(dl, VT, Overflow, Result, SumDiff);
10787unsigned Opcode = Node->getOpcode();
10790EVT VT =
LHS.getValueType();
10791EVT ResVT = Node->getValueType(0);
10800// We can't perform arithmetic on i1 values. Extending them would 10801// probably result in worse codegen, so let's just use two selects instead. 10802// Some targets are also just better off using selects rather than subtraction 10803// because one of the conditions can be merged with one of the selects. 10804// And finally, if we don't know the contents of high bits of a boolean value 10805// we can't perform any arithmetic either. 10822unsigned Opcode = Node->getOpcode();
10826EVT VT =
LHS.getValueType();
10831"Expected a SHLSAT opcode");
10832assert(VT ==
RHS.getValueType() &&
"Expected operands to be the same type");
10838// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate. 10864EVT VT =
LHS.getValueType();
10865assert(
RHS.getValueType() == VT &&
"Mismatching operand types");
10867assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
10869"Signed flag should only be set when HiLHS and RiRHS are null");
10871// We'll expand the multiplication by brute force because we have no other 10872// options. This is a trivially-generalized version of the code from 10873// Hacker's Delight (itself derived from Knuth's Algorithm M from section 10874// 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate 10875// sign bits while calculating the Hi half. 10877unsigned HalfBits = Bits / 2;
10886// This is always an unsigned shift. 10908// If HiLHS and HiRHS are set, multiply them by the opposite low part and add 10909// the products to Hi. 10922EVT VT =
LHS.getValueType();
10923assert(
RHS.getValueType() == VT &&
"Mismatching operand types");
10925// We can fall back to a libcall with an illegal type for the MUL if we 10926// have a libcall big enough. 10928if (WideVT == MVT::i16)
10929 LC = RTLIB::MUL_I16;
10930elseif (WideVT == MVT::i32)
10931 LC = RTLIB::MUL_I32;
10932elseif (WideVT == MVT::i64)
10933 LC = RTLIB::MUL_I64;
10934elseif (WideVT == MVT::i128)
10935 LC = RTLIB::MUL_I128;
10944// The high part is obtained by SRA'ing all but one of the bits of low 10955// Attempt a libcall. 10960if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.
getDataLayout())) {
10961// Halves of WideVT are packed into registers in different order 10962// depending on platform endianness. This is usually handled by 10963// the C calling convention, but we can't defer to it in 10966 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10969 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10972"Ret value is a collection of constituent nodes holding result.");
10975Lo = Ret.getOperand(0);
10976Hi = Ret.getOperand(1);
10978Lo = Ret.getOperand(1);
10979Hi = Ret.getOperand(0);
10989"Expected a fixed point multiplication opcode");
10994EVT VT =
LHS.getValueType();
10995unsigned Scale = Node->getConstantOperandVal(2);
11004// [us]mul.fix(a, b, 0) -> mul(a, b) 11011SDValue Product = Result.getValue(0);
11012SDValue Overflow = Result.getValue(1);
11019// Xor the inputs, if resulting sign bit is 0 the product will be 11020// positive, else negative. 11023 Result = DAG.
getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11024return DAG.
getSelect(dl, VT, Overflow, Result, Product);
11028SDValue Product = Result.getValue(0);
11029SDValue Overflow = Result.getValue(1);
11033return DAG.
getSelect(dl, VT, Overflow, SatMax, Product);
11038"Expected scale to be less than the number of bits if signed or at " 11039"most the number of bits if unsigned.");
11041"Expected both operands to be the same type");
11043// Get the upper and lower bits of the result. 11053Lo = Result.getValue(0);
11054Hi = Result.getValue(1);
11059// Try for a multiplication using a wider type. 11075if (Scale == VTSize)
11076// Result is just the top half since we'd be shifting by the width of the 11077// operand. Overflow impossible so this works for both UMULFIX and 11081// The result will need to be shifted right by the scale since both operands 11082// are scaled. The result is given to us in 2 halves, so we only want part of 11083// both in the result. 11090// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the 11091// widened multiplication) aren't all zeroes. 11093// Saturate to max if ((Hi >> Scale) != 0), 11094// which is the same as if (Hi > ((1 << Scale) - 1)) 11105// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the 11106// widened multiplication) aren't all ones or all zeroes. 11115// Saturated to SatMin if wide product is negative, and SatMax if wide 11116// product is positive ... 11120// ... but only if we overflowed. 11121return DAG.
getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11124// We handled Scale==0 above so all the bits to examine is in Hi. 11126// Saturate to max if ((Hi >> (Scale - 1)) > 0), 11127// which is the same as if (Hi > (1 << (Scale - 1)) - 1) 11131// Saturate to min if (Hi >> (Scale - 1)) < -1), 11132// which is the same as if (HI < (-1 << (Scale - 1)) 11146"Expected a fixed point division opcode");
11148EVT VT =
LHS.getValueType();
11153// If there is enough room in the type to upscale the LHS or downscale the 11154// RHS before the division, we can perform it in this type without having to 11155// resize. For signed operations, the LHS headroom is the number of 11156// redundant sign bits, and for unsigned ones it is the number of zeroes. 11157// The headroom for the RHS is the number of trailing zeroes. 11162// For signed saturating operations, we need to be able to detect true integer 11163// division overflow; that is, when you have MIN / -EPS. However, this 11164// is undefined behavior and if we emit divisions that could take such 11165// values it may cause undesired behavior (arithmetic exceptions on x86, for 11167// Avoid this by requiring an extra bit so that we never get this case. 11168// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale 11169// signed saturating division, we need to emit a whopping 32-bit division. 11170if (LHSLead + RHSTrail < Scale + (
unsigned)(Saturating &&
Signed))
11173unsigned LHSShift = std::min(LHSLead, Scale);
11174unsigned RHSShift = Scale - LHSShift;
11176// At this point, we know that if we shift the LHS up by LHSShift and the 11177// RHS down by RHSShift, we can emit a regular division with a final scaling 11189// For signed operations, if the resulting quotient is negative and the 11190// remainder is nonzero, subtract 1 from the quotient to round towards 11191// negative infinity. 11193// FIXME: Ideally we would always produce an SDIVREM here, but if the 11194// type isn't legal, SDIVREM cannot be expanded. There is no reason why 11195// we couldn't just form a libcall, but the type legalizer doesn't do it. 11233// If UADDO_CARRY/SUBO_CARRY is legal, use that instead. 11238 { LHS, RHS, CarryIn });
11247EVT ResultType = Node->getValueType(1);
11252// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces 11253// the live range of X. We assume comparing with 0 is cheap. 11254// The general case (X + C) < C is not necessarily beneficial. Although we 11255// reduce the live range of X, we may introduce the materialization of 11258 DAG.
getSetCC(dl, SetCCType, Result,
11261// Special case: uaddo X, -1 overflows if X != 0. 11282EVT ResultType = Node->getValueType(1);
11286// If SADDSAT/SSUBSAT is legal, compare results to detect overflow. 11297// For an addition, the result should be less than one of the operands (LHS) 11298// if and only if the other operand (RHS) is negative, otherwise there will 11300// For a subtraction, the result should be less than one of the operands 11301// (LHS) if and only if the other operand (RHS) is (non-zero) positive, 11302// otherwise there will be overflow. 11308 DAG.
getNode(
ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11309 ResultType, ResultType);
11315EVT VT = Node->getValueType(0);
11321// For power-of-two multiplications we can use a simpler shift expansion. 11323constAPInt &
C = RHSC->getAPIntValue();
11324// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X } 11325if (
C.isPowerOf2()) {
11326// smulo(x, signed_min) is same as umulo(x, signed_min). 11327bool UseArithShift =
isSigned && !
C.isMinSignedValue();
11330 Overflow = DAG.
getSetCC(dl, SetCCVT,
11332 dl, VT, Result, ShiftAmt),
11345staticconstunsigned Ops[2][3] =
11368 forceExpandWideMUL(DAG, dl,
isSigned,
LHS,
RHS, BottomHalf, TopHalf);
11371 Result = BottomHalf;
11378 Overflow = DAG.
getSetCC(dl, SetCCVT, TopHalf,
11382// Truncate the result if SetCC returns a larger type than needed. 11383EVT RType = Node->getValueType(1);
11388"Unexpected result type for S/UMULO legalization");
11396EVT VT =
Op.getValueType();
11400"Expanding reductions for scalable vectors is undefined.");
11402// Try to use a shuffle reduction for power of two vectors. 11411Op = DAG.
getNode(BaseOpcode, dl, HalfVT,
Lo,
Hi, Node->getFlags());
11423for (
unsigned i = 1; i < NumElts; i++)
11424 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11426// Result type may be wider than element type. 11427if (EltVT != Node->getValueType(0))
11434SDValue AccOp = Node->getOperand(0);
11435SDValue VecOp = Node->getOperand(1);
11443"Expanding reductions for scalable vectors is undefined.");
11453for (
unsigned i = 0; i < NumElts; i++)
11454 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11461EVT VT = Node->getValueType(0);
11466SDValue Dividend = Node->getOperand(0);
11467SDValue Divisor = Node->getOperand(1);
11470 Result = DAG.
getNode(DivRemOpc, dl, VTs, Dividend, Divisor).
getValue(1);
11487SDValue Src = Node->getOperand(0);
11489// DstVT is the result type, while SatVT is the size to which we saturate 11490EVT SrcVT = Src.getValueType();
11491EVT DstVT = Node->getValueType(0);
11493EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11496assert(SatWidth <= DstWidth &&
11497"Expected saturation width smaller than result width");
11499// Determine minimum and maximum integer values and their corresponding 11500// floating-point values. 11501APInt MinInt, MaxInt;
11510// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as 11511// libcall emission cannot handle this. Large result types will fail. 11512if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11514 SrcVT = Src.getValueType();
11531// If the integer bounds are exactly representable as floats and min/max are 11532// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence 11533// of comparisons and selects. 11536if (AreExactFloatBounds && MinMaxLegal) {
11539// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. 11541// Clamp by MaxFloat from above. NaN cannot occur. 11543// Convert clamped value to integer. 11545 dl, DstVT, Clamped);
11547// In the unsigned case we're done, because we mapped NaN to MinFloat, 11548// which will cast to zero. 11552// Otherwise, select 0 if Src is NaN. 11557return DAG.
getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11563// Result of direct conversion. The assumption here is that the operation is 11564// non-trapping and it's fine to apply it to an out-of-range value if we 11565// select it away later. 11574// If Src ULT MinFloat, select MinInt. In particular, this also selects 11575// MinInt if Src is NaN. 11578// If Src OGT MaxFloat, select MaxInt. 11582// In the unsigned case we are done, because we mapped NaN to MinInt, which 11587// Otherwise, select 0 if Src is NaN. 11596EVT OperandVT =
Op.getValueType();
11600// We are rounding binary64/binary128 -> binary32 -> bfloat16. This 11601// can induce double-rounding which may alter the results. We can 11602// correct for this using a trick explained in: Boldo, Sylvie, and 11603// Guillaume Melquiond. "When double rounding is odd." 17th IMACS 11604// World Congress. 2005. 11618 AbsWide = DAG.
getBitcast(OperandVT, ClearedSign);
11623// We can keep the narrow value as-is if narrowing was exact (no 11624// rounding error), the wide value was NaN (the narrow value is also 11625// NaN and should be preserved) or if we rounded to the odd value. 11633// The result is already odd so we don't need to do anything. 11638// We keep results which are exact, odd or NaN. 11641 KeepNarrow = DAG.
getNode(
ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11642// We morally performed a round-down if AbsNarrow is smaller than 11646// If the narrow value is odd or exact, pick it. 11647// Otherwise, narrow is even and corresponds to either the rounded-up 11648// or rounded-down value. If narrow is the rounded-down value, we want 11649// the rounded-up value as it will be odd. 11650SDValue Adjust = DAG.
getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11652Op = DAG.
getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11664EVT VT = Node->getValueType(0);
11667if (Node->getConstantOperandVal(1) == 1) {
11670EVT OperandVT =
Op.getValueType();
11676// We are rounding binary64/binary128 -> binary32 -> bfloat16. This 11677// can induce double-rounding which may alter the results. We can 11678// correct for this using a trick explained in: Boldo, Sylvie, and 11679// Guillaume Melquiond. "When double rounding is odd." 17th IMACS 11680// World Congress. 2005. 11682EVT I32 =
F32.changeTypeToInteger();
11683Op = expandRoundInexactToOdd(
F32,
Op, dl, DAG);
11686// Conversions should set NaN's quiet bit. This also prevents NaNs from 11687// turning into infinities. 11691// Factor in the contribution of the low 16 bits. 11700// Don't round if we had a NaN, we don't want to turn 0x7fffffff into 11704// Now that we have rounded, shift the bits into position. 11708EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11718assert(Node->getValueType(0).isScalableVector() &&
11719"Fixed length vector types expected to use SHUFFLE_VECTOR!");
11721EVT VT = Node->getValueType(0);
11722SDValue V1 = Node->getOperand(0);
11723SDValue V2 = Node->getOperand(1);
11724 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11727// Expand through memory thusly: 11728// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr 11730// Store V2, Ptr + sizeof(V1) 11732// TrailingElts = -Imm 11733// Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt)) 11735// Ptr = Ptr + (Imm * sizeof(VT.Elt)) 11743EVT PtrVT = StackPtr.getValueType();
11745auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11748// Store the lo part of CONCAT_VECTORS(V1, V2) 11750// Store the hi part of CONCAT_VECTORS(V1, V2) 11758// Load back the required element. getVectorElementPointer takes care of 11759// clamping the index if it's out-of-bounds. 11760 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11761// Load the spliced result 11762return DAG.
getLoad(VT,
DL, StoreV2, StackPtr,
11768// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2. 11781// Calculate the start address of the spliced result. 11784// Load the spliced result 11785return DAG.
getLoad(VT,
DL, StoreV2, StackPtr2,
11792SDValue Vec = Node->getOperand(0);
11793SDValue Mask = Node->getOperand(1);
11794SDValue Passthru = Node->getOperand(2);
11798EVT MaskVT = Mask.getValueType();
11801// Needs to be handled by targets that have scalable vector types. 11807int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11815bool HasPassthru = !Passthru.
isUndef();
11817// If we have a passthru vector, store it on the stack, overwrite the matching 11818// positions and then re-write the last element that was potentially 11819// overwritten even though mask[i] = false. 11821 Chain = DAG.
getStore(Chain,
DL, Passthru, StackPtr, PtrInfo);
11824APInt PassthruSplatVal;
11825bool IsSplatPassthru =
11828if (IsSplatPassthru) {
11829// As we do not know which position we wrote to last, we cannot simply 11830// access that index from the passthru vector. So we first check if passthru 11831// is a splat vector, to use any element ... 11832 LastWriteVal = DAG.
getConstant(PassthruSplatVal,
DL, ScalarVT);
11833 }
elseif (HasPassthru) {
11834// ... if it is not a splat vector, we need to get the passthru value at 11835// position = popcount(mask) and re-load it from the stack before it is 11836// overwritten in the loop below. 11845 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11847 ScalarVT,
DL, Chain, LastElmtPtr,
11853for (
unsignedI = 0;
I < NumElms;
I++) {
11857SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11859 Chain,
DL, ValI, OutPtr,
11862// Get the mask value and add it to the current output position. This 11863// either increments by 1 if MaskI is true or adds 0 otherwise. 11864// Freeze in case we have poison/undef mask entries. 11872if (HasPassthru &&
I == NumElms - 1) {
11878 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11880// Re-write the last ValI if all lanes were selected. Otherwise, 11881// overwrite the last write it with the passthru value. 11882 LastWriteVal = DAG.
getSelect(
DL, ScalarVT, AllLanesSelected, ValI,
11885 Chain,
DL, LastWriteVal, OutPtr,
11890return DAG.
getLoad(VecVT,
DL, Chain, StackPtr, PtrInfo);
11898bool IsSignaling)
const{
11899MVT OpVT =
LHS.getSimpleValueType();
11902assert(!EVL == !Mask &&
"VP Mask and EVL must either both be set or unset");
11903bool IsNonVP = !EVL;
11917// Swapping operands didn't work. Try inverting the condition. 11918bool NeedSwap =
false;
11919 InvCC = getSetCCInverse(CCCode, OpVT);
11921// If inverting the condition is not enough, try swapping operands 11934// Special case: expand i1 comparisons using logical operations. 11935if (OpVT == MVT::i1) {
11947caseISD::SETGT:
// X >s Y --> X == 0 & Y == 1 --> ~X & Y 11948caseISD::SETULT:
// X <u Y --> X == 0 & Y == 1 --> ~X & Y 11952caseISD::SETLT:
// X <s Y --> X == 1 & Y == 0 --> ~Y & X 11953caseISD::SETUGT:
// X >u Y --> X == 1 & Y == 0 --> ~Y & X 11957caseISD::SETULE:
// X <=u Y --> X == 0 | Y == 1 --> ~X | Y 11958caseISD::SETGE:
// X >=s Y --> X == 0 | Y == 1 --> ~X | Y 11962caseISD::SETUGE:
// X >=u Y --> X == 1 | Y == 0 --> ~Y | X 11963caseISD::SETLE:
// X <=s Y --> X == 1 | Y == 0 --> ~Y | X 11988"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11993"If SETO is expanded, SETOEQ must be legal!");
12000// If the SETUO or SETO CC isn't legal, we might be able to use 12001// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one 12002// of SETOGT/SETOLT to be legal, the other can be emulated by swapping 12010 NeedInvert = ((
unsigned)CCCode & 0x8U);
12024// If we are floating point, assign and break, otherwise fall through. 12026// We can use the 4th bit to tell if we are the unordered 12027// or ordered version of the opcode. 12033// Fallthrough if we are unsigned integer. 12041// If all combinations of inverting the condition and swapping operands 12042// didn't work then we have no means to expand the condition. 12048// If we aren't the ordered or unorder operation, 12049// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). 12058// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) 12071LHS = DAG.
getNode(Opc, dl, VT, SetCC1, SetCC2);
12073// Transform the binary opcode to the VP equivalent. 12075 Opc = Opc ==
ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12076LHS = DAG.
getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12088EVT VT = Node->getValueType(0);
12089// Despite its documentation, GetSplitDestVTs will assert if VT cannot be 12090// split into two equal parts. 12094// Restrict expansion to cases where both parts can be concatenated. 12100unsigned Opcode = Node->getOpcode();
12102// Don't expand if the result is likely to be unrolled anyway. 12107for (
constSDValue &V : Node->op_values()) {
unsigned const MachineRegisterInfo * MRI
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isUndef(const MachineInstr &MI)
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Function const char * Passes
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
APInt udiv(const APInt &RHS) const
Unsigned division operation.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
void setSignBit()
Set the sign bit to 1.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
void clearAllBits()
Set every bit to 0.
APInt reverseBits() const
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
void negate()
Negate this APInt in place.
unsigned countr_zero() const
Count the number of trailing zero bits.
unsigned countl_zero() const
The APInt version of std::countl_zero.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
unsigned countLeadingZeros() const
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
unsigned logBase2() const
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
void setAllBits()
Set every bit to 1.
APInt multiplicativeInverse() const
bool isMaxSignedValue() const
Determine if this is the largest signed value.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
APInt sext(unsigned width) const
Sign extend to a new width.
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
bool isOne() const
Determine if this is a value of 1.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
int64_t getSExtValue() const
Get sign extended value.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
unsigned countr_one() const
Count the number of trailing one bits.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
ConstantFP - Floating Point Values [float, double].
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
bool isLittleEndian() const
Layout endianness...
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
int64_t getOffset() const
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Base class for the full range of assembler expressions which are needed for parsing.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Class to represent pointers.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Class to represent struct types.
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
@ ZeroOrOneBooleanContent
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
@ SMULO
Same for multiplication.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
constexpr unsigned BitWidth
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
unsigned Log2(Align A)
Returns the log2 of the alignment.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isFixedLengthVector() const
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
bool isIndirect
isIndirect - True if this operand is an indirect operand.
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
bool isZero() const
Returns true if value is all zero.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
bool isUnknown() const
Returns true if we don't know any bits.
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
unsigned getBitWidth() const
Get the bit width of this value.
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
void resetAll()
Resets the known state of all bits.
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
unsigned ShiftAmount
shift amount
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
bool isBeforeLegalizeOps() const
void AddToWorklist(SDNode *N)
bool isCalledByLegalizer() const
bool isBeforeLegalize() const
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
ArrayRef< EVT > OpsVTBeforeSoften
bool IsPostTypeLegalization
MakeLibCallOptions & setIsSigned(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
bool LegalOperations() const
Magic data for optimising unsigned division by a constant.
unsigned PreShift
pre-shift amount
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
unsigned PostShift
post-shift amount