Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/Analysis/VectorUtils.h"
17#include "llvm/CodeGen/CallingConvLower.h"
18#include "llvm/CodeGen/CodeGenCommonISel.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineJumpTableInfo.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/TargetRegisterInfo.h"
25#include "llvm/IR/DataLayout.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/GlobalVariable.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/Support/DivisionByConstantInfo.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/KnownBits.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Target/TargetMachine.h"
36#include <cctype>
37using namespacellvm;
38
39/// NOTE: The TargetMachine owns TLOF.
40TargetLowering::TargetLowering(constTargetMachine &tm)
41 :TargetLoweringBase(tm) {}
42
43constchar *TargetLowering::getTargetNodeName(unsigned Opcode) const{
44returnnullptr;
45}
46
47boolTargetLowering::isPositionIndependent() const{
48returngetTargetMachine().isPositionIndependent();
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
53boolTargetLowering::isInTailCallPosition(SelectionDAG &DAG,SDNode *Node,
54SDValue &Chain) const{
55constFunction &F = DAG.getMachineFunction().getFunction();
56
57// First, check if tail calls have been disabled in this function.
58if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59returnfalse;
60
61// Conservatively require the attributes of the call to match those of
62// the return. Ignore following attributes because they don't affect the
63// call sequence.
64AttrBuilder CallerAttrs(F.getContext(),F.getAttributes().getRetAttrs());
65for (constauto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef,
68 Attribute::Range, Attribute::NoFPClass})
69 CallerAttrs.removeAttribute(Attr);
70
71if (CallerAttrs.hasAttributes())
72returnfalse;
73
74// It's not safe to eliminate the sign / zero extension of the return value.
75if (CallerAttrs.contains(Attribute::ZExt) ||
76 CallerAttrs.contains(Attribute::SExt))
77returnfalse;
78
79// Check if the only use is a function return node.
80returnisUsedByReturnOnly(Node, Chain);
81}
82
83boolTargetLowering::parametersInCSRMatch(constMachineRegisterInfo &MRI,
84constuint32_t *CallerPreservedMask,
85constSmallVectorImpl<CCValAssign> &ArgLocs,
86constSmallVectorImpl<SDValue> &OutVals) const{
87for (unsignedI = 0, E = ArgLocs.size();I != E; ++I) {
88constCCValAssign &ArgLoc = ArgLocs[I];
89if (!ArgLoc.isRegLoc())
90continue;
91MCRegister Reg = ArgLoc.getLocReg();
92// Only look at callee saved registers.
93if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94continue;
95// Check that we pass the value used for the caller.
96// (We look for a CopyFromReg reading a virtual register that is used
97// for the function live-in value of register Reg)
98SDValueValue = OutVals[I];
99if (Value->getOpcode() ==ISD::AssertZext)
100Value =Value.getOperand(0);
101if (Value->getOpcode() !=ISD::CopyFromReg)
102returnfalse;
103Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105returnfalse;
106 }
107returntrue;
108}
109
110/// Set CallLoweringInfo attribute flags based on a call instruction
111/// and called function attributes.
112voidTargetLoweringBase::ArgListEntry::setAttributes(constCallBase *Call,
113unsigned ArgIdx) {
114IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127Alignment = Call->getParamStackAlign(ArgIdx);
128IndirectType =nullptr;
129assert(IsByVal +IsPreallocated +IsInAlloca +IsSRet <= 1 &&
130"multiple ABI attributes?");
131if (IsByVal) {
132IndirectType = Call->getParamByValType(ArgIdx);
133if (!Alignment)
134Alignment = Call->getParamAlign(ArgIdx);
135 }
136if (IsPreallocated)
137IndirectType = Call->getParamPreallocatedType(ArgIdx);
138if (IsInAlloca)
139IndirectType = Call->getParamInAllocaType(ArgIdx);
140if (IsSRet)
141IndirectType = Call->getParamStructRetType(ArgIdx);
142}
143
144/// Generate a libcall taking the given operands as arguments and returning a
145/// result of type RetVT.
146std::pair<SDValue, SDValue>
147TargetLowering::makeLibCall(SelectionDAG &DAG,RTLIB::Libcall LC,EVT RetVT,
148ArrayRef<SDValue> Ops,
149MakeLibCallOptions CallOptions,
150constSDLoc &dl,
151SDValue InChain) const{
152if (!InChain)
153 InChain = DAG.getEntryNode();
154
155TargetLowering::ArgListTy Args;
156 Args.reserve(Ops.size());
157
158TargetLowering::ArgListEntry Entry;
159for (unsigned i = 0; i < Ops.size(); ++i) {
160SDValue NewOp = Ops[i];
161 Entry.Node = NewOp;
162 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
163 Entry.IsSExt =
164shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
165 Entry.IsZExt = !Entry.IsSExt;
166
167if (CallOptions.IsSoften &&
168 !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
169 Entry.IsSExt = Entry.IsZExt =false;
170 }
171 Args.push_back(Entry);
172 }
173
174if (LC == RTLIB::UNKNOWN_LIBCALL)
175report_fatal_error("Unsupported library call operation!");
176SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
177getPointerTy(DAG.getDataLayout()));
178
179Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180TargetLowering::CallLoweringInfo CLI(DAG);
181bool signExtend =shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
182bool zeroExtend = !signExtend;
183
184if (CallOptions.IsSoften &&
185 !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
186 signExtend = zeroExtend =false;
187 }
188
189 CLI.setDebugLoc(dl)
190 .setChain(InChain)
191 .setLibCallee(getLibcallCallingConv(LC),RetTy, Callee, std::move(Args))
192 .setNoReturn(CallOptions.DoesNotReturn)
193 .setDiscardResult(!CallOptions.IsReturnValueUsed)
194 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
195 .setSExtResult(signExtend)
196 .setZExtResult(zeroExtend);
197return LowerCallTo(CLI);
198}
199
200boolTargetLowering::findOptimalMemOpLowering(
201 std::vector<EVT> &MemOps,unsigned Limit,constMemOp &Op,unsigned DstAS,
202unsigned SrcAS,constAttributeList &FuncAttributes) const{
203if (Limit != ~unsigned(0) &&Op.isMemcpyWithFixedDstAlign() &&
204Op.getSrcAlign() <Op.getDstAlign())
205returnfalse;
206
207EVT VT =getOptimalMemOpType(Op, FuncAttributes);
208
209if (VT == MVT::Other) {
210// Use the largest integer type whose alignment constraints are satisfied.
211// We only need to check DstAlign here as SrcAlign is always greater or
212// equal to DstAlign (or zero).
213 VT = MVT::LAST_INTEGER_VALUETYPE;
214if (Op.isFixedDstAlign())
215while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
216 !allowsMisalignedMemoryAccesses(VT, DstAS,Op.getDstAlign()))
217 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
218assert(VT.isInteger());
219
220// Find the largest legal integer type.
221MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
222while (!isTypeLegal(LVT))
223 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
224assert(LVT.isInteger());
225
226// If the type we've chosen is larger than the largest legal integer type
227// then use that instead.
228if (VT.bitsGT(LVT))
229 VT = LVT;
230 }
231
232unsigned NumMemOps = 0;
233uint64_tSize =Op.size();
234while (Size) {
235unsigned VTSize = VT.getSizeInBits() / 8;
236while (VTSize >Size) {
237// For now, only use non-vector load / store's for the left-over pieces.
238EVT NewVT = VT;
239unsigned NewVTSize;
240
241bool Found =false;
242if (VT.isVector() || VT.isFloatingPoint()) {
243 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
245isSafeMemOpType(NewVT.getSimpleVT()))
246 Found =true;
247elseif (NewVT == MVT::i64 &&
248isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
249isSafeMemOpType(MVT::f64)) {
250// i64 is usually not legal on 32-bit targets, but f64 may be.
251 NewVT = MVT::f64;
252 Found =true;
253 }
254 }
255
256if (!Found) {
257do {
258 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
259if (NewVT == MVT::i8)
260break;
261 }while (!isSafeMemOpType(NewVT.getSimpleVT()));
262 }
263 NewVTSize = NewVT.getSizeInBits() / 8;
264
265// If the new VT cannot cover all of the remaining bits, then consider
266// issuing a (or a pair of) unaligned and overlapping load / store.
267unsignedFast;
268if (NumMemOps &&Op.allowOverlap() && NewVTSize <Size &&
269allowsMisalignedMemoryAccesses(
270 VT, DstAS,Op.isFixedDstAlign() ?Op.getDstAlign() :Align(1),
271MachineMemOperand::MONone, &Fast) &&
272Fast)
273 VTSize =Size;
274else {
275 VT = NewVT;
276 VTSize = NewVTSize;
277 }
278 }
279
280if (++NumMemOps > Limit)
281returnfalse;
282
283 MemOps.push_back(VT);
284Size -= VTSize;
285 }
286
287returntrue;
288}
289
290/// Soften the operands of a comparison. This code is shared among BR_CC,
291/// SELECT_CC, and SETCC handlers.
292voidTargetLowering::softenSetCCOperands(SelectionDAG &DAG,EVT VT,
293SDValue &NewLHS,SDValue &NewRHS,
294ISD::CondCode &CCCode,
295constSDLoc &dl,constSDValue OldLHS,
296constSDValue OldRHS) const{
297SDValue Chain;
298return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
299 OldRHS, Chain);
300}
301
302voidTargetLowering::softenSetCCOperands(SelectionDAG &DAG,EVT VT,
303SDValue &NewLHS,SDValue &NewRHS,
304ISD::CondCode &CCCode,
305constSDLoc &dl,constSDValue OldLHS,
306constSDValue OldRHS,
307SDValue &Chain,
308bool IsSignaling) const{
309// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
310// not supporting it. We can update this code when libgcc provides such
311// functions.
312
313assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314 &&"Unsupported setcc type!");
315
316// Expand into one or more soft-fp libcall(s).
317RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318bool ShouldInvertCC =false;
319switch (CCCode) {
320caseISD::SETEQ:
321caseISD::SETOEQ:
322 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
325break;
326caseISD::SETNE:
327caseISD::SETUNE:
328 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329 (VT == MVT::f64) ? RTLIB::UNE_F64 :
330 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
331break;
332caseISD::SETGE:
333caseISD::SETOGE:
334 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335 (VT == MVT::f64) ? RTLIB::OGE_F64 :
336 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
337break;
338caseISD::SETLT:
339caseISD::SETOLT:
340 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341 (VT == MVT::f64) ? RTLIB::OLT_F64 :
342 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
343break;
344caseISD::SETLE:
345caseISD::SETOLE:
346 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347 (VT == MVT::f64) ? RTLIB::OLE_F64 :
348 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
349break;
350caseISD::SETGT:
351caseISD::SETOGT:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353 (VT == MVT::f64) ? RTLIB::OGT_F64 :
354 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
355break;
356caseISD::SETO:
357 ShouldInvertCC =true;
358 [[fallthrough]];
359caseISD::SETUO:
360 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361 (VT == MVT::f64) ? RTLIB::UO_F64 :
362 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363break;
364caseISD::SETONE:
365// SETONE = O && UNE
366 ShouldInvertCC =true;
367 [[fallthrough]];
368caseISD::SETUEQ:
369 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370 (VT == MVT::f64) ? RTLIB::UO_F64 :
371 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
375break;
376default:
377// Invert CC for unordered comparisons
378 ShouldInvertCC =true;
379switch (CCCode) {
380caseISD::SETULT:
381 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382 (VT == MVT::f64) ? RTLIB::OGE_F64 :
383 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
384break;
385caseISD::SETULE:
386 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387 (VT == MVT::f64) ? RTLIB::OGT_F64 :
388 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
389break;
390caseISD::SETUGT:
391 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392 (VT == MVT::f64) ? RTLIB::OLE_F64 :
393 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
394break;
395caseISD::SETUGE:
396 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397 (VT == MVT::f64) ? RTLIB::OLT_F64 :
398 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
399break;
400default:llvm_unreachable("Do not know how to soften this setcc!");
401 }
402 }
403
404// Use the target specific return value for comparison lib calls.
405EVT RetVT =getCmpLibcallReturnType();
406SDValue Ops[2] = {NewLHS, NewRHS};
407TargetLowering::MakeLibCallOptions CallOptions;
408EVT OpsVT[2] = { OldLHS.getValueType(),
409 OldRHS.getValueType() };
410 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT,true);
411auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
412 NewLHS = Call.first;
413 NewRHS = DAG.getConstant(0, dl, RetVT);
414
415 CCCode =getCmpLibcallCC(LC1);
416if (ShouldInvertCC) {
417assert(RetVT.isInteger());
418 CCCode = getSetCCInverse(CCCode, RetVT);
419 }
420
421if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
422// Update Chain.
423 Chain = Call.second;
424 }else {
425EVT SetCCVT =
426getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
427SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
428auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
429 CCCode =getCmpLibcallCC(LC2);
430if (ShouldInvertCC)
431 CCCode = getSetCCInverse(CCCode, RetVT);
432 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
433if (Chain)
434 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
435 Call2.second);
436 NewLHS = DAG.getNode(ShouldInvertCC ?ISD::AND :ISD::OR, dl,
437 Tmp.getValueType(), Tmp, NewLHS);
438 NewRHS =SDValue();
439 }
440}
441
442/// Return the entry encoding for a jump table in the current function. The
443/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444unsignedTargetLowering::getJumpTableEncoding() const{
445// In non-pic modes, just use the address of a block.
446if (!isPositionIndependent())
447returnMachineJumpTableInfo::EK_BlockAddress;
448
449// In PIC mode, if the target supports a GPRel32 directive, use it.
450if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() !=nullptr)
451returnMachineJumpTableInfo::EK_GPRel32BlockAddress;
452
453// Otherwise, use a label difference.
454returnMachineJumpTableInfo::EK_LabelDifference32;
455}
456
457SDValueTargetLowering::getPICJumpTableRelocBase(SDValue Table,
458SelectionDAG &DAG) const{
459// If our PIC model is GP relative, use the global offset table as the base.
460unsigned JTEncoding = getJumpTableEncoding();
461
462if ((JTEncoding ==MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
463 (JTEncoding ==MachineJumpTableInfo::EK_GPRel32BlockAddress))
464return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
465
466return Table;
467}
468
469/// This returns the relocation base for the given PIC jumptable, the same as
470/// getPICJumpTableRelocBase, but as an MCExpr.
471constMCExpr *
472TargetLowering::getPICJumpTableRelocBaseExpr(constMachineFunction *MF,
473unsigned JTI,MCContext &Ctx) const{
474// The normal PIC reloc base is the label at the start of the jump table.
475returnMCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
476}
477
478SDValueTargetLowering::expandIndirectJTBranch(constSDLoc &dl,SDValueValue,
479SDValueAddr,int JTI,
480SelectionDAG &DAG) const{
481SDValue Chain =Value;
482// Jump table debug info is only needed if CodeView is enabled.
483if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
484 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
485 }
486return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain,Addr);
487}
488
489bool
490TargetLowering::isOffsetFoldingLegal(constGlobalAddressSDNode *GA) const{
491constTargetMachine &TM =getTargetMachine();
492constGlobalValue *GV = GA->getGlobal();
493
494// If the address is not even local to this DSO we will have to load it from
495// a got and then add the offset.
496if (!TM.shouldAssumeDSOLocal(GV))
497returnfalse;
498
499// If the code is position independent we will have to add a base register.
500if (isPositionIndependent())
501returnfalse;
502
503// Otherwise we can do it.
504returntrue;
505}
506
507//===----------------------------------------------------------------------===//
508// Optimization Methods
509//===----------------------------------------------------------------------===//
510
511/// If the specified instruction has a constant integer operand and there are
512/// bits set in that constant that are not demanded, then clear those bits and
513/// return true.
514boolTargetLowering::ShrinkDemandedConstant(SDValueOp,
515constAPInt &DemandedBits,
516constAPInt &DemandedElts,
517TargetLoweringOpt &TLO) const{
518SDLocDL(Op);
519unsigned Opcode =Op.getOpcode();
520
521// Early-out if we've ended up calling an undemanded node, leave this to
522// constant folding.
523if (DemandedBits.isZero() || DemandedElts.isZero())
524returnfalse;
525
526// Do target-specific constant optimization.
527if (targetShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))
528return TLO.New.getNode();
529
530// FIXME: ISD::SELECT, ISD::SELECT_CC
531switch (Opcode) {
532default:
533break;
534caseISD::XOR:
535caseISD::AND:
536caseISD::OR: {
537auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
538if (!Op1C || Op1C->isOpaque())
539returnfalse;
540
541// If this is a 'not' op, don't touch it because that's a canonical form.
542constAPInt &C = Op1C->getAPIntValue();
543if (Opcode ==ISD::XOR &&DemandedBits.isSubsetOf(C))
544returnfalse;
545
546if (!C.isSubsetOf(DemandedBits)) {
547EVT VT =Op.getValueType();
548SDValue NewC = TLO.DAG.getConstant(DemandedBits &C,DL, VT);
549SDValue NewOp = TLO.DAG.getNode(Opcode,DL, VT,Op.getOperand(0), NewC,
550Op->getFlags());
551return TLO.CombineTo(Op, NewOp);
552 }
553
554break;
555 }
556 }
557
558returnfalse;
559}
560
561boolTargetLowering::ShrinkDemandedConstant(SDValueOp,
562constAPInt &DemandedBits,
563TargetLoweringOpt &TLO) const{
564EVT VT =Op.getValueType();
565APInt DemandedElts = VT.isVector()
566 ?APInt::getAllOnes(VT.getVectorNumElements())
567 :APInt(1, 1);
568returnShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO);
569}
570
571/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
572/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
573/// but it could be generalized for targets with other types of implicit
574/// widening casts.
575boolTargetLowering::ShrinkDemandedOp(SDValueOp,unsignedBitWidth,
576constAPInt &DemandedBits,
577TargetLoweringOpt &TLO) const{
578assert(Op.getNumOperands() == 2 &&
579"ShrinkDemandedOp only supports binary operators!");
580assert(Op.getNode()->getNumValues() == 1 &&
581"ShrinkDemandedOp only supports nodes with one result!");
582
583EVT VT =Op.getValueType();
584SelectionDAG &DAG = TLO.DAG;
585SDLoc dl(Op);
586
587// Early return, as this function cannot handle vector types.
588if (VT.isVector())
589returnfalse;
590
591assert(Op.getOperand(0).getValueType().getScalarSizeInBits() ==BitWidth &&
592Op.getOperand(1).getValueType().getScalarSizeInBits() ==BitWidth &&
593"ShrinkDemandedOp only supports operands that have the same size!");
594
595// Don't do this if the node has another user, which may require the
596// full value.
597if (!Op.getNode()->hasOneUse())
598returnfalse;
599
600// Search for the smallest integer type with free casts to and from
601// Op's type. For expedience, just check power-of-2 integer types.
602unsigned DemandedSize =DemandedBits.getActiveBits();
603for (unsigned SmallVTBits =llvm::bit_ceil(DemandedSize);
604 SmallVTBits <BitWidth; SmallVTBits =NextPowerOf2(SmallVTBits)) {
605EVT SmallVT =EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606if (isTruncateFree(VT, SmallVT) &&isZExtFree(SmallVT, VT)) {
607// We found a type with free casts.
608
609// If the operation has the 'disjoint' flag, then the
610// operands on the new node are also disjoint.
611SDNodeFlags Flags(Op->getFlags().hasDisjoint() ?SDNodeFlags::Disjoint
612 :SDNodeFlags::None);
613SDValueX = DAG.getNode(
614Op.getOpcode(), dl, SmallVT,
615 DAG.getNode(ISD::TRUNCATE, dl, SmallVT,Op.getOperand(0)),
616 DAG.getNode(ISD::TRUNCATE, dl, SmallVT,Op.getOperand(1)), Flags);
617assert(DemandedSize <= SmallVTBits &&"Narrowed below demanded bits?");
618SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT,X);
619return TLO.CombineTo(Op, Z);
620 }
621 }
622returnfalse;
623}
624
625boolTargetLowering::SimplifyDemandedBits(SDValueOp,constAPInt &DemandedBits,
626DAGCombinerInfo &DCI) const{
627SelectionDAG &DAG = DCI.DAG;
628TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
629 !DCI.isBeforeLegalizeOps());
630KnownBits Known;
631
632bool Simplified = SimplifyDemandedBits(Op,DemandedBits, Known, TLO);
633if (Simplified) {
634 DCI.AddToWorklist(Op.getNode());
635 DCI.CommitTargetLoweringOpt(TLO);
636 }
637return Simplified;
638}
639
640boolTargetLowering::SimplifyDemandedBits(SDValueOp,constAPInt &DemandedBits,
641constAPInt &DemandedElts,
642DAGCombinerInfo &DCI) const{
643SelectionDAG &DAG = DCI.DAG;
644TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
645 !DCI.isBeforeLegalizeOps());
646KnownBits Known;
647
648bool Simplified =
649 SimplifyDemandedBits(Op,DemandedBits, DemandedElts, Known, TLO);
650if (Simplified) {
651 DCI.AddToWorklist(Op.getNode());
652 DCI.CommitTargetLoweringOpt(TLO);
653 }
654return Simplified;
655}
656
657boolTargetLowering::SimplifyDemandedBits(SDValueOp,constAPInt &DemandedBits,
658KnownBits &Known,
659TargetLoweringOpt &TLO,
660unsignedDepth,
661bool AssumeSingleUse) const{
662EVT VT =Op.getValueType();
663
664// Since the number of lanes in a scalable vector is unknown at compile time,
665// we track one bit which is implicitly broadcast to all lanes. This means
666// that all lanes in a scalable vector are considered demanded.
667APInt DemandedElts = VT.isFixedLengthVector()
668 ?APInt::getAllOnes(VT.getVectorNumElements())
669 :APInt(1, 1);
670return SimplifyDemandedBits(Op,DemandedBits, DemandedElts, Known, TLO,Depth,
671 AssumeSingleUse);
672}
673
674// TODO: Under what circumstances can we create nodes? Constant folding?
675SDValueTargetLowering::SimplifyMultipleUseDemandedBits(
676SDValueOp,constAPInt &DemandedBits,constAPInt &DemandedElts,
677SelectionDAG &DAG,unsignedDepth) const{
678EVT VT =Op.getValueType();
679
680// Limit search depth.
681if (Depth >=SelectionDAG::MaxRecursionDepth)
682returnSDValue();
683
684// Ignore UNDEFs.
685if (Op.isUndef())
686returnSDValue();
687
688// Not demanding any bits/elts from Op.
689if (DemandedBits == 0 || DemandedElts == 0)
690return DAG.getUNDEF(VT);
691
692bool IsLE = DAG.getDataLayout().isLittleEndian();
693unsigned NumElts = DemandedElts.getBitWidth();
694unsignedBitWidth =DemandedBits.getBitWidth();
695KnownBits LHSKnown, RHSKnown;
696switch (Op.getOpcode()) {
697caseISD::BITCAST: {
698if (VT.isScalableVector())
699returnSDValue();
700
701SDValue Src =peekThroughBitcasts(Op.getOperand(0));
702EVT SrcVT = Src.getValueType();
703EVT DstVT =Op.getValueType();
704if (SrcVT == DstVT)
705return Src;
706
707unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
708unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
709if (NumSrcEltBits == NumDstEltBits)
710if (SDValue V = SimplifyMultipleUseDemandedBits(
711 Src,DemandedBits, DemandedElts, DAG,Depth + 1))
712return DAG.getBitcast(DstVT, V);
713
714if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
715unsigned Scale = NumDstEltBits / NumSrcEltBits;
716unsigned NumSrcElts = SrcVT.getVectorNumElements();
717APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);
718APInt DemandedSrcElts =APInt::getZero(NumSrcElts);
719for (unsigned i = 0; i != Scale; ++i) {
720unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
721unsigned BitOffset = EltOffset * NumSrcEltBits;
722APInt Sub =DemandedBits.extractBits(NumSrcEltBits, BitOffset);
723if (!Sub.isZero()) {
724 DemandedSrcBits |= Sub;
725for (unsigned j = 0; j != NumElts; ++j)
726if (DemandedElts[j])
727 DemandedSrcElts.setBit((j * Scale) + i);
728 }
729 }
730
731if (SDValue V = SimplifyMultipleUseDemandedBits(
732 Src, DemandedSrcBits, DemandedSrcElts, DAG,Depth + 1))
733return DAG.getBitcast(DstVT, V);
734 }
735
736// TODO - bigendian once we have test coverage.
737if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
738unsigned Scale = NumSrcEltBits / NumDstEltBits;
739unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
740APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);
741APInt DemandedSrcElts =APInt::getZero(NumSrcElts);
742for (unsigned i = 0; i != NumElts; ++i)
743if (DemandedElts[i]) {
744unsignedOffset = (i % Scale) * NumDstEltBits;
745 DemandedSrcBits.insertBits(DemandedBits,Offset);
746 DemandedSrcElts.setBit(i / Scale);
747 }
748
749if (SDValue V = SimplifyMultipleUseDemandedBits(
750 Src, DemandedSrcBits, DemandedSrcElts, DAG,Depth + 1))
751return DAG.getBitcast(DstVT, V);
752 }
753
754break;
755 }
756caseISD::FREEZE: {
757SDValue N0 =Op.getOperand(0);
758if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
759/*PoisonOnly=*/false))
760return N0;
761break;
762 }
763caseISD::AND: {
764 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);
765 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);
766
767// If all of the demanded bits are known 1 on one side, return the other.
768// These bits cannot contribute to the result of the 'and' in this
769// context.
770if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
771returnOp.getOperand(0);
772if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
773returnOp.getOperand(1);
774break;
775 }
776caseISD::OR: {
777 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);
778 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);
779
780// If all of the demanded bits are known zero on one side, return the
781// other. These bits cannot contribute to the result of the 'or' in this
782// context.
783if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
784returnOp.getOperand(0);
785if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
786returnOp.getOperand(1);
787break;
788 }
789caseISD::XOR: {
790 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);
791 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);
792
793// If all of the demanded bits are known zero on one side, return the
794// other.
795if (DemandedBits.isSubsetOf(RHSKnown.Zero))
796returnOp.getOperand(0);
797if (DemandedBits.isSubsetOf(LHSKnown.Zero))
798returnOp.getOperand(1);
799break;
800 }
801caseISD::ADD: {
802 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);
803if (RHSKnown.isZero())
804returnOp.getOperand(0);
805
806 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);
807if (LHSKnown.isZero())
808returnOp.getOperand(1);
809break;
810 }
811caseISD::SHL: {
812// If we are only demanding sign bits then we can use the shift source
813// directly.
814if (std::optional<uint64_t> MaxSA =
815 DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {
816SDValue Op0 =Op.getOperand(0);
817unsigned ShAmt = *MaxSA;
818unsigned NumSignBits =
819 DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);
820unsigned UpperDemandedBits =BitWidth -DemandedBits.countr_zero();
821if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
822return Op0;
823 }
824break;
825 }
826caseISD::SRL: {
827// If we are only demanding sign bits then we can use the shift source
828// directly.
829if (std::optional<uint64_t> MaxSA =
830 DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {
831SDValue Op0 =Op.getOperand(0);
832unsigned ShAmt = *MaxSA;
833// Must already be signbits in DemandedBits bounds, and can't demand any
834// shifted in zeroes.
835if (DemandedBits.countl_zero() >= ShAmt) {
836unsigned NumSignBits =
837 DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);
838if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
839return Op0;
840 }
841 }
842break;
843 }
844caseISD::SETCC: {
845SDValue Op0 =Op.getOperand(0);
846SDValue Op1 =Op.getOperand(1);
847ISD::CondCodeCC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
848// If (1) we only need the sign-bit, (2) the setcc operands are the same
849// width as the setcc result, and (3) the result of a setcc conforms to 0 or
850// -1, we may be able to bypass the setcc.
851if (DemandedBits.isSignMask() &&
852 Op0.getScalarValueSizeInBits() ==BitWidth &&
853getBooleanContents(Op0.getValueType()) ==
854BooleanContent::ZeroOrNegativeOneBooleanContent) {
855// If we're testing X < 0, then this compare isn't needed - just use X!
856// FIXME: We're limiting to integer types here, but this should also work
857// if we don't care about FP signed-zero. The use of SETLT with FP means
858// that we don't care about NaNs.
859if (CC ==ISD::SETLT && Op1.getValueType().isInteger() &&
860 (isNullConstant(Op1) ||ISD::isBuildVectorAllZeros(Op1.getNode())))
861return Op0;
862 }
863break;
864 }
865caseISD::SIGN_EXTEND_INREG: {
866// If none of the extended bits are demanded, eliminate the sextinreg.
867SDValue Op0 =Op.getOperand(0);
868EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
869unsigned ExBits = ExVT.getScalarSizeInBits();
870if (DemandedBits.getActiveBits() <= ExBits &&
871shouldRemoveRedundantExtend(Op))
872return Op0;
873// If the input is already sign extended, just drop the extension.
874unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);
875if (NumSignBits >= (BitWidth - ExBits + 1))
876return Op0;
877break;
878 }
879caseISD::ANY_EXTEND_VECTOR_INREG:
880caseISD::SIGN_EXTEND_VECTOR_INREG:
881caseISD::ZERO_EXTEND_VECTOR_INREG: {
882if (VT.isScalableVector())
883returnSDValue();
884
885// If we only want the lowest element and none of extended bits, then we can
886// return the bitcasted source vector.
887SDValue Src =Op.getOperand(0);
888EVT SrcVT = Src.getValueType();
889EVT DstVT =Op.getValueType();
890if (IsLE && DemandedElts == 1 &&
891 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
892DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
893return DAG.getBitcast(DstVT, Src);
894 }
895break;
896 }
897caseISD::INSERT_VECTOR_ELT: {
898if (VT.isScalableVector())
899returnSDValue();
900
901// If we don't demand the inserted element, return the base vector.
902SDValue Vec =Op.getOperand(0);
903auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
904EVT VecVT = Vec.getValueType();
905if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
906 !DemandedElts[CIdx->getZExtValue()])
907return Vec;
908break;
909 }
910caseISD::INSERT_SUBVECTOR: {
911if (VT.isScalableVector())
912returnSDValue();
913
914SDValue Vec =Op.getOperand(0);
915SDValue Sub =Op.getOperand(1);
916uint64_tIdx =Op.getConstantOperandVal(2);
917unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
918APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts,Idx);
919// If we don't demand the inserted subvector, return the base vector.
920if (DemandedSubElts == 0)
921return Vec;
922break;
923 }
924caseISD::VECTOR_SHUFFLE: {
925assert(!VT.isScalableVector());
926ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
927
928// If all the demanded elts are from one operand and are inline,
929// then we can use the operand directly.
930bool AllUndef =true, IdentityLHS =true, IdentityRHS =true;
931for (unsigned i = 0; i != NumElts; ++i) {
932int M = ShuffleMask[i];
933if (M < 0 || !DemandedElts[i])
934continue;
935 AllUndef =false;
936 IdentityLHS &= (M == (int)i);
937 IdentityRHS &= ((M - NumElts) == i);
938 }
939
940if (AllUndef)
941return DAG.getUNDEF(Op.getValueType());
942if (IdentityLHS)
943returnOp.getOperand(0);
944if (IdentityRHS)
945returnOp.getOperand(1);
946break;
947 }
948default:
949// TODO: Probably okay to remove after audit; here to reduce change size
950// in initial enablement patch for scalable vectors
951if (VT.isScalableVector())
952returnSDValue();
953
954if (Op.getOpcode() >=ISD::BUILTIN_OP_END)
955if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
956Op,DemandedBits, DemandedElts, DAG,Depth))
957return V;
958break;
959 }
960returnSDValue();
961}
962
963SDValueTargetLowering::SimplifyMultipleUseDemandedBits(
964SDValueOp,constAPInt &DemandedBits,SelectionDAG &DAG,
965unsignedDepth) const{
966EVT VT =Op.getValueType();
967// Since the number of lanes in a scalable vector is unknown at compile time,
968// we track one bit which is implicitly broadcast to all lanes. This means
969// that all lanes in a scalable vector are considered demanded.
970APInt DemandedElts = VT.isFixedLengthVector()
971 ?APInt::getAllOnes(VT.getVectorNumElements())
972 :APInt(1, 1);
973return SimplifyMultipleUseDemandedBits(Op,DemandedBits, DemandedElts, DAG,
974Depth);
975}
976
977SDValueTargetLowering::SimplifyMultipleUseDemandedVectorElts(
978SDValueOp,constAPInt &DemandedElts,SelectionDAG &DAG,
979unsignedDepth) const{
980APIntDemandedBits =APInt::getAllOnes(Op.getScalarValueSizeInBits());
981return SimplifyMultipleUseDemandedBits(Op,DemandedBits, DemandedElts, DAG,
982Depth);
983}
984
985// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
986// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
987staticSDValuecombineShiftToAVG(SDValueOp,
988TargetLowering::TargetLoweringOpt &TLO,
989constTargetLowering &TLI,
990constAPInt &DemandedBits,
991constAPInt &DemandedElts,unsignedDepth) {
992assert((Op.getOpcode() ==ISD::SRL ||Op.getOpcode() ==ISD::SRA) &&
993"SRL or SRA node is required here!");
994// Is the right shift using an immediate value of 1?
995ConstantSDNode *N1C =isConstOrConstSplat(Op.getOperand(1), DemandedElts);
996if (!N1C || !N1C->isOne())
997returnSDValue();
998
999// We are looking for an avgfloor
1000// add(ext, ext)
1001// or one of these as a avgceil
1002// add(add(ext, ext), 1)
1003// add(add(ext, 1), ext)
1004// add(ext, add(ext, 1))
1005SDValueAdd =Op.getOperand(0);
1006if (Add.getOpcode() !=ISD::ADD)
1007returnSDValue();
1008
1009SDValue ExtOpA =Add.getOperand(0);
1010SDValue ExtOpB =Add.getOperand(1);
1011SDValue Add2;
1012auto MatchOperands = [&](SDValue Op1,SDValue Op2,SDValue Op3,SDValueA) {
1013ConstantSDNode *ConstOp;
1014if ((ConstOp =isConstOrConstSplat(Op2, DemandedElts)) &&
1015 ConstOp->isOne()) {
1016 ExtOpA = Op1;
1017 ExtOpB = Op3;
1018 Add2 =A;
1019returntrue;
1020 }
1021if ((ConstOp =isConstOrConstSplat(Op3, DemandedElts)) &&
1022 ConstOp->isOne()) {
1023 ExtOpA = Op1;
1024 ExtOpB = Op2;
1025 Add2 =A;
1026returntrue;
1027 }
1028returnfalse;
1029 };
1030bool IsCeil =
1031 (ExtOpA.getOpcode() ==ISD::ADD &&
1032 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1033 (ExtOpB.getOpcode() ==ISD::ADD &&
1034 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1035
1036// If the shift is signed (sra):
1037// - Needs >= 2 sign bit for both operands.
1038// - Needs >= 2 zero bits.
1039// If the shift is unsigned (srl):
1040// - Needs >= 1 zero bit for both operands.
1041// - Needs 1 demanded bit zero and >= 2 sign bits.
1042SelectionDAG &DAG = TLO.DAG;
1043unsigned ShiftOpc =Op.getOpcode();
1044bool IsSigned =false;
1045unsignedKnownBits;
1046unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts,Depth);
1047unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts,Depth);
1048unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1049unsigned NumZeroA =
1050 DAG.computeKnownBits(ExtOpA, DemandedElts,Depth).countMinLeadingZeros();
1051unsigned NumZeroB =
1052 DAG.computeKnownBits(ExtOpB, DemandedElts,Depth).countMinLeadingZeros();
1053unsigned NumZero = std::min(NumZeroA, NumZeroB);
1054
1055switch (ShiftOpc) {
1056default:
1057llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1058caseISD::SRA: {
1059if (NumZero >= 2 && NumSigned < NumZero) {
1060 IsSigned =false;
1061KnownBits = NumZero;
1062break;
1063 }
1064if (NumSigned >= 1) {
1065 IsSigned =true;
1066KnownBits = NumSigned;
1067break;
1068 }
1069returnSDValue();
1070 }
1071caseISD::SRL: {
1072if (NumZero >= 1 && NumSigned < NumZero) {
1073 IsSigned =false;
1074KnownBits = NumZero;
1075break;
1076 }
1077if (NumSigned >= 1 &&DemandedBits.isSignBitClear()) {
1078 IsSigned =true;
1079KnownBits = NumSigned;
1080break;
1081 }
1082returnSDValue();
1083 }
1084 }
1085
1086unsigned AVGOpc = IsCeil ? (IsSigned ?ISD::AVGCEILS :ISD::AVGCEILU)
1087 : (IsSigned ?ISD::AVGFLOORS :ISD::AVGFLOORU);
1088
1089// Find the smallest power-2 type that is legal for this vector size and
1090// operation, given the original type size and the number of known sign/zero
1091// bits.
1092EVT VT =Op.getValueType();
1093unsigned MinWidth =
1094 std::max<unsigned>(VT.getScalarSizeInBits() -KnownBits, 8);
1095EVT NVT =EVT::getIntegerVT(*DAG.getContext(),llvm::bit_ceil(MinWidth));
1096if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1097returnSDValue();
1098if (VT.isVector())
1099 NVT =EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1100if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1101// If we could not transform, and (both) adds are nuw/nsw, we can use the
1102// larger type size to do the transform.
1103if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1104returnSDValue();
1105if (DAG.willNotOverflowAdd(IsSigned,Add.getOperand(0),
1106Add.getOperand(1)) &&
1107 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1108 Add2.getOperand(1))))
1109 NVT = VT;
1110else
1111returnSDValue();
1112 }
1113
1114// Don't create a AVGFLOOR node with a scalar constant unless its legal as
1115// this is likely to stop other folds (reassociation, value tracking etc.)
1116if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1117 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1118returnSDValue();
1119
1120SDLocDL(Op);
1121SDValue ResultAVG =
1122 DAG.getNode(AVGOpc,DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA,DL, NVT),
1123 DAG.getExtOrTrunc(IsSigned, ExtOpB,DL, NVT));
1124return DAG.getExtOrTrunc(IsSigned, ResultAVG,DL, VT);
1125}
1126
1127/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1128/// result of Op are ever used downstream. If we can use this information to
1129/// simplify Op, create a new simplified DAG node and return true, returning the
1130/// original and new nodes in Old and New. Otherwise, analyze the expression and
1131/// return a mask of Known bits for the expression (used to simplify the
1132/// caller). The Known bits may only be accurate for those bits in the
1133/// OriginalDemandedBits and OriginalDemandedElts.
1134boolTargetLowering::SimplifyDemandedBits(
1135SDValueOp,constAPInt &OriginalDemandedBits,
1136constAPInt &OriginalDemandedElts,KnownBits &Known,TargetLoweringOpt &TLO,
1137unsignedDepth,bool AssumeSingleUse) const{
1138unsignedBitWidth = OriginalDemandedBits.getBitWidth();
1139assert(Op.getScalarValueSizeInBits() ==BitWidth &&
1140"Mask size mismatches value type size!");
1141
1142// Don't know anything.
1143 Known =KnownBits(BitWidth);
1144
1145EVT VT =Op.getValueType();
1146bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1147unsigned NumElts = OriginalDemandedElts.getBitWidth();
1148assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1149"Unexpected vector size");
1150
1151APIntDemandedBits = OriginalDemandedBits;
1152APInt DemandedElts = OriginalDemandedElts;
1153SDLoc dl(Op);
1154
1155// Undef operand.
1156if (Op.isUndef())
1157returnfalse;
1158
1159// We can't simplify target constants.
1160if (Op.getOpcode() ==ISD::TargetConstant)
1161returnfalse;
1162
1163if (Op.getOpcode() ==ISD::Constant) {
1164// We know all of the bits for a constant!
1165 Known =KnownBits::makeConstant(Op->getAsAPIntVal());
1166returnfalse;
1167 }
1168
1169if (Op.getOpcode() ==ISD::ConstantFP) {
1170// We know all of the bits for a floating point constant!
1171 Known =KnownBits::makeConstant(
1172 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1173returnfalse;
1174 }
1175
1176// Other users may use these bits.
1177bool HasMultiUse =false;
1178if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1179if (Depth >=SelectionDAG::MaxRecursionDepth) {
1180// Limit search depth.
1181returnfalse;
1182 }
1183// Allow multiple uses, just set the DemandedBits/Elts to all bits.
1184DemandedBits =APInt::getAllOnes(BitWidth);
1185 DemandedElts =APInt::getAllOnes(NumElts);
1186 HasMultiUse =true;
1187 }elseif (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1188// Not demanding any bits/elts from Op.
1189return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1190 }elseif (Depth >=SelectionDAG::MaxRecursionDepth) {
1191// Limit search depth.
1192returnfalse;
1193 }
1194
1195KnownBits Known2;
1196switch (Op.getOpcode()) {
1197caseISD::SCALAR_TO_VECTOR: {
1198if (VT.isScalableVector())
1199returnfalse;
1200if (!DemandedElts[0])
1201return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1202
1203KnownBits SrcKnown;
1204SDValue Src =Op.getOperand(0);
1205unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1206APInt SrcDemandedBits =DemandedBits.zext(SrcBitWidth);
1207if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO,Depth + 1))
1208returntrue;
1209
1210// Upper elements are undef, so only get the knownbits if we just demand
1211// the bottom element.
1212if (DemandedElts == 1)
1213 Known = SrcKnown.anyextOrTrunc(BitWidth);
1214break;
1215 }
1216caseISD::BUILD_VECTOR:
1217// Collect the known bits that are shared by every demanded element.
1218// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1219 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);
1220returnfalse;// Don't fall through, will infinitely loop.
1221caseISD::SPLAT_VECTOR: {
1222SDValue Scl =Op.getOperand(0);
1223APInt DemandedSclBits =DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1224KnownBits KnownScl;
1225if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO,Depth + 1))
1226returntrue;
1227
1228// Implicitly truncate the bits to match the official semantics of
1229// SPLAT_VECTOR.
1230 Known = KnownScl.trunc(BitWidth);
1231break;
1232 }
1233caseISD::LOAD: {
1234auto *LD = cast<LoadSDNode>(Op);
1235if (getTargetConstantFromLoad(LD)) {
1236 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);
1237returnfalse;// Don't fall through, will infinitely loop.
1238 }
1239if (ISD::isZEXTLoad(Op.getNode()) &&Op.getResNo() == 0) {
1240// If this is a ZEXTLoad and we are looking at the loaded value.
1241EVT MemVT = LD->getMemoryVT();
1242unsigned MemBits = MemVT.getScalarSizeInBits();
1243 Known.Zero.setBitsFrom(MemBits);
1244returnfalse;// Don't fall through, will infinitely loop.
1245 }
1246break;
1247 }
1248caseISD::INSERT_VECTOR_ELT: {
1249if (VT.isScalableVector())
1250returnfalse;
1251SDValue Vec =Op.getOperand(0);
1252SDValue Scl =Op.getOperand(1);
1253auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1254EVT VecVT = Vec.getValueType();
1255
1256// If index isn't constant, assume we need all vector elements AND the
1257// inserted element.
1258APInt DemandedVecElts(DemandedElts);
1259if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1260unsignedIdx = CIdx->getZExtValue();
1261 DemandedVecElts.clearBit(Idx);
1262
1263// Inserted element is not required.
1264if (!DemandedElts[Idx])
1265return TLO.CombineTo(Op, Vec);
1266 }
1267
1268KnownBits KnownScl;
1269unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1270APInt DemandedSclBits =DemandedBits.zextOrTrunc(NumSclBits);
1271if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO,Depth + 1))
1272returntrue;
1273
1274 Known = KnownScl.anyextOrTrunc(BitWidth);
1275
1276KnownBits KnownVec;
1277if (SimplifyDemandedBits(Vec,DemandedBits, DemandedVecElts, KnownVec, TLO,
1278Depth + 1))
1279returntrue;
1280
1281if (!!DemandedVecElts)
1282 Known = Known.intersectWith(KnownVec);
1283
1284returnfalse;
1285 }
1286caseISD::INSERT_SUBVECTOR: {
1287if (VT.isScalableVector())
1288returnfalse;
1289// Demand any elements from the subvector and the remainder from the src its
1290// inserted into.
1291SDValue Src =Op.getOperand(0);
1292SDValue Sub =Op.getOperand(1);
1293uint64_tIdx =Op.getConstantOperandVal(2);
1294unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1295APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts,Idx);
1296APInt DemandedSrcElts = DemandedElts;
1297 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts),Idx);
1298
1299KnownBits KnownSub, KnownSrc;
1300if (SimplifyDemandedBits(Sub,DemandedBits, DemandedSubElts, KnownSub, TLO,
1301Depth + 1))
1302returntrue;
1303if (SimplifyDemandedBits(Src,DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1304Depth + 1))
1305returntrue;
1306
1307 Known.Zero.setAllBits();
1308 Known.One.setAllBits();
1309if (!!DemandedSubElts)
1310 Known = Known.intersectWith(KnownSub);
1311if (!!DemandedSrcElts)
1312 Known = Known.intersectWith(KnownSrc);
1313
1314// Attempt to avoid multi-use src if we don't need anything from it.
1315if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1316 !DemandedSrcElts.isAllOnes()) {
1317SDValue NewSub = SimplifyMultipleUseDemandedBits(
1318 Sub,DemandedBits, DemandedSubElts, TLO.DAG,Depth + 1);
1319SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1320 Src,DemandedBits, DemandedSrcElts, TLO.DAG,Depth + 1);
1321if (NewSub || NewSrc) {
1322 NewSub = NewSub ? NewSub : Sub;
1323 NewSrc = NewSrc ? NewSrc : Src;
1324SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1325Op.getOperand(2));
1326return TLO.CombineTo(Op, NewOp);
1327 }
1328 }
1329break;
1330 }
1331caseISD::EXTRACT_SUBVECTOR: {
1332if (VT.isScalableVector())
1333returnfalse;
1334// Offset the demanded elts by the subvector index.
1335SDValue Src =Op.getOperand(0);
1336if (Src.getValueType().isScalableVector())
1337break;
1338uint64_tIdx =Op.getConstantOperandVal(1);
1339unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1340APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1341
1342if (SimplifyDemandedBits(Src,DemandedBits, DemandedSrcElts, Known, TLO,
1343Depth + 1))
1344returntrue;
1345
1346// Attempt to avoid multi-use src if we don't need anything from it.
1347if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1348SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1349 Src,DemandedBits, DemandedSrcElts, TLO.DAG,Depth + 1);
1350if (DemandedSrc) {
1351SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1352Op.getOperand(1));
1353return TLO.CombineTo(Op, NewOp);
1354 }
1355 }
1356break;
1357 }
1358caseISD::CONCAT_VECTORS: {
1359if (VT.isScalableVector())
1360returnfalse;
1361 Known.Zero.setAllBits();
1362 Known.One.setAllBits();
1363EVT SubVT =Op.getOperand(0).getValueType();
1364unsigned NumSubVecs =Op.getNumOperands();
1365unsigned NumSubElts = SubVT.getVectorNumElements();
1366for (unsigned i = 0; i != NumSubVecs; ++i) {
1367APInt DemandedSubElts =
1368 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1369if (SimplifyDemandedBits(Op.getOperand(i),DemandedBits, DemandedSubElts,
1370 Known2, TLO,Depth + 1))
1371returntrue;
1372// Known bits are shared by every demanded subvector element.
1373if (!!DemandedSubElts)
1374 Known = Known.intersectWith(Known2);
1375 }
1376break;
1377 }
1378caseISD::VECTOR_SHUFFLE: {
1379assert(!VT.isScalableVector());
1380ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1381
1382// Collect demanded elements from shuffle operands..
1383APInt DemandedLHS, DemandedRHS;
1384if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1385 DemandedRHS))
1386break;
1387
1388if (!!DemandedLHS || !!DemandedRHS) {
1389SDValue Op0 =Op.getOperand(0);
1390SDValue Op1 =Op.getOperand(1);
1391
1392 Known.Zero.setAllBits();
1393 Known.One.setAllBits();
1394if (!!DemandedLHS) {
1395if (SimplifyDemandedBits(Op0,DemandedBits, DemandedLHS, Known2, TLO,
1396Depth + 1))
1397returntrue;
1398 Known = Known.intersectWith(Known2);
1399 }
1400if (!!DemandedRHS) {
1401if (SimplifyDemandedBits(Op1,DemandedBits, DemandedRHS, Known2, TLO,
1402Depth + 1))
1403returntrue;
1404 Known = Known.intersectWith(Known2);
1405 }
1406
1407// Attempt to avoid multi-use ops if we don't need anything from them.
1408SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1409 Op0,DemandedBits, DemandedLHS, TLO.DAG,Depth + 1);
1410SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1411 Op1,DemandedBits, DemandedRHS, TLO.DAG,Depth + 1);
1412if (DemandedOp0 || DemandedOp1) {
1413 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1414 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1415SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1416return TLO.CombineTo(Op, NewOp);
1417 }
1418 }
1419break;
1420 }
1421caseISD::AND: {
1422SDValue Op0 =Op.getOperand(0);
1423SDValue Op1 =Op.getOperand(1);
1424
1425// If the RHS is a constant, check to see if the LHS would be zero without
1426// using the bits from the RHS. Below, we use knowledge about the RHS to
1427// simplify the LHS, here we're using information from the LHS to simplify
1428// the RHS.
1429if (ConstantSDNode *RHSC =isConstOrConstSplat(Op1, DemandedElts)) {
1430// Do not increment Depth here; that can cause an infinite loop.
1431KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts,Depth);
1432// If the LHS already has zeros where RHSC does, this 'and' is dead.
1433if ((LHSKnown.Zero &DemandedBits) ==
1434 (~RHSC->getAPIntValue() &DemandedBits))
1435return TLO.CombineTo(Op, Op0);
1436
1437// If any of the set bits in the RHS are known zero on the LHS, shrink
1438// the constant.
1439if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero &DemandedBits,
1440 DemandedElts, TLO))
1441returntrue;
1442
1443// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1444// constant, but if this 'and' is only clearing bits that were just set by
1445// the xor, then this 'and' can be eliminated by shrinking the mask of
1446// the xor. For example, for a 32-bit X:
1447// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1448if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1449 LHSKnown.One == ~RHSC->getAPIntValue()) {
1450SDValueXor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1451return TLO.CombineTo(Op,Xor);
1452 }
1453 }
1454
1455// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1456// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1457if (Op0.getOpcode() ==ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1458 (Op0.getOperand(0).isUndef() ||
1459ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1460 Op0->hasOneUse()) {
1461unsigned NumSubElts =
1462 Op0.getOperand(1).getValueType().getVectorNumElements();
1463unsigned SubIdx = Op0.getConstantOperandVal(2);
1464APInt DemandedSub =
1465APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1466KnownBits KnownSubMask =
1467 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts,Depth + 1);
1468if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1469SDValue NewAnd =
1470 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1471SDValue NewInsert =
1472 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1473 Op0.getOperand(1), Op0.getOperand(2));
1474return TLO.CombineTo(Op, NewInsert);
1475 }
1476 }
1477
1478if (SimplifyDemandedBits(Op1,DemandedBits, DemandedElts, Known, TLO,
1479Depth + 1))
1480returntrue;
1481if (SimplifyDemandedBits(Op0, ~Known.Zero &DemandedBits, DemandedElts,
1482 Known2, TLO,Depth + 1))
1483returntrue;
1484
1485// If all of the demanded bits are known one on one side, return the other.
1486// These bits cannot contribute to the result of the 'and'.
1487if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1488return TLO.CombineTo(Op, Op0);
1489if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1490return TLO.CombineTo(Op, Op1);
1491// If all of the demanded bits in the inputs are known zeros, return zero.
1492if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1493return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1494// If the RHS is a constant, see if we can simplify it.
1495if (ShrinkDemandedConstant(Op, ~Known2.Zero &DemandedBits, DemandedElts,
1496 TLO))
1497returntrue;
1498// If the operation can be done in a smaller type, do so.
1499if (ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO))
1500returntrue;
1501
1502// Attempt to avoid multi-use ops if we don't need anything from them.
1503if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1504SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1505 Op0,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);
1506SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1507 Op1,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);
1508if (DemandedOp0 || DemandedOp1) {
1509 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1511SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1512return TLO.CombineTo(Op, NewOp);
1513 }
1514 }
1515
1516 Known &= Known2;
1517break;
1518 }
1519caseISD::OR: {
1520SDValue Op0 =Op.getOperand(0);
1521SDValue Op1 =Op.getOperand(1);
1522if (SimplifyDemandedBits(Op1,DemandedBits, DemandedElts, Known, TLO,
1523Depth + 1)) {
1524Op->dropFlags(SDNodeFlags::Disjoint);
1525returntrue;
1526 }
1527
1528if (SimplifyDemandedBits(Op0, ~Known.One &DemandedBits, DemandedElts,
1529 Known2, TLO,Depth + 1)) {
1530Op->dropFlags(SDNodeFlags::Disjoint);
1531returntrue;
1532 }
1533
1534// If all of the demanded bits are known zero on one side, return the other.
1535// These bits cannot contribute to the result of the 'or'.
1536if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1537return TLO.CombineTo(Op, Op0);
1538if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1539return TLO.CombineTo(Op, Op1);
1540// If the RHS is a constant, see if we can simplify it.
1541if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))
1542returntrue;
1543// If the operation can be done in a smaller type, do so.
1544if (ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO))
1545returntrue;
1546
1547// Attempt to avoid multi-use ops if we don't need anything from them.
1548if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1549SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1550 Op0,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);
1551SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1552 Op1,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);
1553if (DemandedOp0 || DemandedOp1) {
1554 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1555 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1556SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1557return TLO.CombineTo(Op, NewOp);
1558 }
1559 }
1560
1561// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1562// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1563if (Op0.getOpcode() ==ISD::AND && Op1.getOpcode() ==ISD::AND &&
1564 Op0->hasOneUse() && Op1->hasOneUse()) {
1565// Attempt to match all commutations - m_c_Or would've been useful!
1566for (intI = 0;I != 2; ++I) {
1567SDValueX =Op.getOperand(I).getOperand(0);
1568SDValue C1 =Op.getOperand(I).getOperand(1);
1569SDValue Alt =Op.getOperand(1 -I).getOperand(0);
1570SDValue C2 =Op.getOperand(1 -I).getOperand(1);
1571if (Alt.getOpcode() ==ISD::OR) {
1572for (int J = 0; J != 2; ++J) {
1573if (X == Alt.getOperand(J)) {
1574SDValueY = Alt.getOperand(1 - J);
1575if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1576 {C1, C2})) {
1577SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT,X, C12);
1578SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT,Y, C2);
1579return TLO.CombineTo(
1580Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1581 }
1582 }
1583 }
1584 }
1585 }
1586 }
1587
1588 Known |= Known2;
1589break;
1590 }
1591caseISD::XOR: {
1592SDValue Op0 =Op.getOperand(0);
1593SDValue Op1 =Op.getOperand(1);
1594
1595if (SimplifyDemandedBits(Op1,DemandedBits, DemandedElts, Known, TLO,
1596Depth + 1))
1597returntrue;
1598if (SimplifyDemandedBits(Op0,DemandedBits, DemandedElts, Known2, TLO,
1599Depth + 1))
1600returntrue;
1601
1602// If all of the demanded bits are known zero on one side, return the other.
1603// These bits cannot contribute to the result of the 'xor'.
1604if (DemandedBits.isSubsetOf(Known.Zero))
1605return TLO.CombineTo(Op, Op0);
1606if (DemandedBits.isSubsetOf(Known2.Zero))
1607return TLO.CombineTo(Op, Op1);
1608// If the operation can be done in a smaller type, do so.
1609if (ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO))
1610returntrue;
1611
1612// If all of the unknown bits are known to be zero on one side or the other
1613// turn this into an *inclusive* or.
1614// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1615if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1616return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1617
1618ConstantSDNode *C =isConstOrConstSplat(Op1, DemandedElts);
1619if (C) {
1620// If one side is a constant, and all of the set bits in the constant are
1621// also known set on the other side, turn this into an AND, as we know
1622// the bits will be cleared.
1623// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1624// NB: it is okay if more bits are known than are requested
1625if (C->getAPIntValue() == Known2.One) {
1626SDValue ANDC =
1627 TLO.DAG.getConstant(~C->getAPIntValue() &DemandedBits, dl, VT);
1628return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1629 }
1630
1631// If the RHS is a constant, see if we can change it. Don't alter a -1
1632// constant because that's a 'not' op, and that is better for combining
1633// and codegen.
1634if (!C->isAllOnes() &&DemandedBits.isSubsetOf(C->getAPIntValue())) {
1635// We're flipping all demanded bits. Flip the undemanded bits too.
1636SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1637return TLO.CombineTo(Op, New);
1638 }
1639
1640unsigned Op0Opcode = Op0.getOpcode();
1641if ((Op0Opcode ==ISD::SRL || Op0Opcode ==ISD::SHL) && Op0.hasOneUse()) {
1642if (ConstantSDNode *ShiftC =
1643isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1644// Don't crash on an oversized shift. We can not guarantee that a
1645// bogus shift has been simplified to undef.
1646if (ShiftC->getAPIntValue().ult(BitWidth)) {
1647uint64_t ShiftAmt = ShiftC->getZExtValue();
1648APInt Ones =APInt::getAllOnes(BitWidth);
1649 Ones = Op0Opcode ==ISD::SHL ? Ones.shl(ShiftAmt)
1650 : Ones.lshr(ShiftAmt);
1651if ((DemandedBits &C->getAPIntValue()) == (DemandedBits & Ones) &&
1652 isDesirableToCommuteXorWithShift(Op.getNode())) {
1653// If the xor constant is a demanded mask, do a 'not' before the
1654// shift:
1655// xor (X << ShiftC), XorC --> (not X) << ShiftC
1656// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1657SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1658return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1659 Op0.getOperand(1)));
1660 }
1661 }
1662 }
1663 }
1664 }
1665
1666// If we can't turn this into a 'not', try to shrink the constant.
1667if (!C || !C->isAllOnes())
1668if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))
1669returntrue;
1670
1671// Attempt to avoid multi-use ops if we don't need anything from them.
1672if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1673SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1674 Op0,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);
1675SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1676 Op1,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);
1677if (DemandedOp0 || DemandedOp1) {
1678 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1679 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1680SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1681return TLO.CombineTo(Op, NewOp);
1682 }
1683 }
1684
1685 Known ^= Known2;
1686break;
1687 }
1688caseISD::SELECT:
1689if (SimplifyDemandedBits(Op.getOperand(2),DemandedBits, DemandedElts,
1690 Known, TLO,Depth + 1))
1691returntrue;
1692if (SimplifyDemandedBits(Op.getOperand(1),DemandedBits, DemandedElts,
1693 Known2, TLO,Depth + 1))
1694returntrue;
1695
1696// If the operands are constants, see if we can simplify them.
1697if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))
1698returntrue;
1699
1700// Only known if known in both the LHS and RHS.
1701 Known = Known.intersectWith(Known2);
1702break;
1703caseISD::VSELECT:
1704if (SimplifyDemandedBits(Op.getOperand(2),DemandedBits, DemandedElts,
1705 Known, TLO,Depth + 1))
1706returntrue;
1707if (SimplifyDemandedBits(Op.getOperand(1),DemandedBits, DemandedElts,
1708 Known2, TLO,Depth + 1))
1709returntrue;
1710
1711// Only known if known in both the LHS and RHS.
1712 Known = Known.intersectWith(Known2);
1713break;
1714caseISD::SELECT_CC:
1715if (SimplifyDemandedBits(Op.getOperand(3),DemandedBits, DemandedElts,
1716 Known, TLO,Depth + 1))
1717returntrue;
1718if (SimplifyDemandedBits(Op.getOperand(2),DemandedBits, DemandedElts,
1719 Known2, TLO,Depth + 1))
1720returntrue;
1721
1722// If the operands are constants, see if we can simplify them.
1723if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))
1724returntrue;
1725
1726// Only known if known in both the LHS and RHS.
1727 Known = Known.intersectWith(Known2);
1728break;
1729caseISD::SETCC: {
1730SDValue Op0 =Op.getOperand(0);
1731SDValue Op1 =Op.getOperand(1);
1732ISD::CondCodeCC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1733// If (1) we only need the sign-bit, (2) the setcc operands are the same
1734// width as the setcc result, and (3) the result of a setcc conforms to 0 or
1735// -1, we may be able to bypass the setcc.
1736if (DemandedBits.isSignMask() &&
1737 Op0.getScalarValueSizeInBits() ==BitWidth &&
1738getBooleanContents(Op0.getValueType()) ==
1739BooleanContent::ZeroOrNegativeOneBooleanContent) {
1740// If we're testing X < 0, then this compare isn't needed - just use X!
1741// FIXME: We're limiting to integer types here, but this should also work
1742// if we don't care about FP signed-zero. The use of SETLT with FP means
1743// that we don't care about NaNs.
1744if (CC ==ISD::SETLT && Op1.getValueType().isInteger() &&
1745 (isNullConstant(Op1) ||ISD::isBuildVectorAllZeros(Op1.getNode())))
1746return TLO.CombineTo(Op, Op0);
1747
1748// TODO: Should we check for other forms of sign-bit comparisons?
1749// Examples: X <= -1, X >= 0
1750 }
1751if (getBooleanContents(Op0.getValueType()) ==
1752TargetLowering::ZeroOrOneBooleanContent &&
1753BitWidth > 1)
1754 Known.Zero.setBitsFrom(1);
1755break;
1756 }
1757caseISD::SHL: {
1758SDValue Op0 =Op.getOperand(0);
1759SDValue Op1 =Op.getOperand(1);
1760EVT ShiftVT = Op1.getValueType();
1761
1762if (std::optional<uint64_t> KnownSA =
1763 TLO.DAG.getValidShiftAmount(Op, DemandedElts,Depth + 1)) {
1764unsigned ShAmt = *KnownSA;
1765if (ShAmt == 0)
1766return TLO.CombineTo(Op, Op0);
1767
1768// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1769// single shift. We can do this if the bottom bits (which are shifted
1770// out) are never demanded.
1771// TODO - support non-uniform vector amounts.
1772if (Op0.getOpcode() ==ISD::SRL) {
1773if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1774if (std::optional<uint64_t> InnerSA =
1775 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {
1776unsigned C1 = *InnerSA;
1777unsigned Opc =ISD::SHL;
1778int Diff = ShAmt - C1;
1779if (Diff < 0) {
1780 Diff = -Diff;
1781 Opc =ISD::SRL;
1782 }
1783SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1784return TLO.CombineTo(
1785Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1786 }
1787 }
1788 }
1789
1790// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1791// are not demanded. This will likely allow the anyext to be folded away.
1792// TODO - support non-uniform vector amounts.
1793if (Op0.getOpcode() ==ISD::ANY_EXTEND) {
1794SDValue InnerOp = Op0.getOperand(0);
1795EVT InnerVT = InnerOp.getValueType();
1796unsigned InnerBits = InnerVT.getScalarSizeInBits();
1797if (ShAmt < InnerBits &&DemandedBits.getActiveBits() <= InnerBits &&
1798 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1799SDValue NarrowShl = TLO.DAG.getNode(
1800ISD::SHL, dl, InnerVT, InnerOp,
1801 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1802return TLO.CombineTo(
1803Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1804 }
1805
1806// Repeat the SHL optimization above in cases where an extension
1807// intervenes: (shl (anyext (shr x, c1)), c2) to
1808// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1809// aren't demanded (as above) and that the shifted upper c1 bits of
1810// x aren't demanded.
1811// TODO - support non-uniform vector amounts.
1812if (InnerOp.getOpcode() ==ISD::SRL && Op0.hasOneUse() &&
1813 InnerOp.hasOneUse()) {
1814if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1815 InnerOp, DemandedElts,Depth + 2)) {
1816unsigned InnerShAmt = *SA2;
1817if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1818DemandedBits.getActiveBits() <=
1819 (InnerBits - InnerShAmt + ShAmt) &&
1820DemandedBits.countr_zero() >= ShAmt) {
1821SDValue NewSA =
1822 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1823SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1824 InnerOp.getOperand(0));
1825return TLO.CombineTo(
1826Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1827 }
1828 }
1829 }
1830 }
1831
1832APInt InDemandedMask =DemandedBits.lshr(ShAmt);
1833if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1834Depth + 1)) {
1835// Disable the nsw and nuw flags. We can no longer guarantee that we
1836// won't wrap after simplification.
1837Op->dropFlags(SDNodeFlags::NoWrap);
1838returntrue;
1839 }
1840 Known.Zero <<= ShAmt;
1841 Known.One <<= ShAmt;
1842// low bits known zero.
1843 Known.Zero.setLowBits(ShAmt);
1844
1845// Attempt to avoid multi-use ops if we don't need anything from them.
1846if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1847SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1848 Op0, InDemandedMask, DemandedElts, TLO.DAG,Depth + 1);
1849if (DemandedOp0) {
1850SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1851return TLO.CombineTo(Op, NewOp);
1852 }
1853 }
1854
1855// TODO: Can we merge this fold with the one below?
1856// Try shrinking the operation as long as the shift amount will still be
1857// in range.
1858if (ShAmt <DemandedBits.getActiveBits() && !VT.isVector() &&
1859Op.getNode()->hasOneUse()) {
1860// Search for the smallest integer type with free casts to and from
1861// Op's type. For expedience, just check power-of-2 integer types.
1862unsigned DemandedSize =DemandedBits.getActiveBits();
1863for (unsigned SmallVTBits =llvm::bit_ceil(DemandedSize);
1864 SmallVTBits <BitWidth; SmallVTBits =NextPowerOf2(SmallVTBits)) {
1865EVT SmallVT =EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1866if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1867 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1868isTruncateFree(VT, SmallVT) &&isZExtFree(SmallVT, VT) &&
1869 (!TLO.LegalOperations() ||isOperationLegal(ISD::SHL, SmallVT))) {
1870assert(DemandedSize <= SmallVTBits &&
1871"Narrowed below demanded bits?");
1872// We found a type with free casts.
1873SDValue NarrowShl = TLO.DAG.getNode(
1874ISD::SHL, dl, SmallVT,
1875 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT,Op.getOperand(0)),
1876 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1877return TLO.CombineTo(
1878Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1879 }
1880 }
1881 }
1882
1883// Narrow shift to lower half - similar to ShrinkDemandedOp.
1884// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1885// Only do this if we demand the upper half so the knownbits are correct.
1886unsigned HalfWidth =BitWidth / 2;
1887if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1888DemandedBits.countLeadingOnes() >= HalfWidth) {
1889EVT HalfVT =EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1890if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1891 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1892isTruncateFree(VT, HalfVT) &&isZExtFree(HalfVT, VT) &&
1893 (!TLO.LegalOperations() ||isOperationLegal(ISD::SHL, HalfVT))) {
1894// If we're demanding the upper bits at all, we must ensure
1895// that the upper bits of the shift result are known to be zero,
1896// which is equivalent to the narrow shift being NUW.
1897if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1898bool IsNSW = Known.countMinSignBits() > HalfWidth;
1899SDNodeFlags Flags;
1900 Flags.setNoSignedWrap(IsNSW);
1901 Flags.setNoUnsignedWrap(IsNUW);
1902SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1903SDValue NewShiftAmt =
1904 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1905SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1906 NewShiftAmt, Flags);
1907SDValue NewExt =
1908 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1909return TLO.CombineTo(Op, NewExt);
1910 }
1911 }
1912 }
1913 }else {
1914// This is a variable shift, so we can't shift the demand mask by a known
1915// amount. But if we are not demanding high bits, then we are not
1916// demanding those bits from the pre-shifted operand either.
1917if (unsigned CTLZ =DemandedBits.countl_zero()) {
1918APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth,BitWidth - CTLZ));
1919if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1920Depth + 1)) {
1921// Disable the nsw and nuw flags. We can no longer guarantee that we
1922// won't wrap after simplification.
1923Op->dropFlags(SDNodeFlags::NoWrap);
1924returntrue;
1925 }
1926 Known.resetAll();
1927 }
1928 }
1929
1930// If we are only demanding sign bits then we can use the shift source
1931// directly.
1932if (std::optional<uint64_t> MaxSA =
1933 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {
1934unsigned ShAmt = *MaxSA;
1935unsigned NumSignBits =
1936 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);
1937unsigned UpperDemandedBits =BitWidth -DemandedBits.countr_zero();
1938if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1939return TLO.CombineTo(Op, Op0);
1940 }
1941break;
1942 }
1943caseISD::SRL: {
1944SDValue Op0 =Op.getOperand(0);
1945SDValue Op1 =Op.getOperand(1);
1946EVT ShiftVT = Op1.getValueType();
1947
1948if (std::optional<uint64_t> KnownSA =
1949 TLO.DAG.getValidShiftAmount(Op, DemandedElts,Depth + 1)) {
1950unsigned ShAmt = *KnownSA;
1951if (ShAmt == 0)
1952return TLO.CombineTo(Op, Op0);
1953
1954// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1955// single shift. We can do this if the top bits (which are shifted out)
1956// are never demanded.
1957// TODO - support non-uniform vector amounts.
1958if (Op0.getOpcode() ==ISD::SHL) {
1959if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1960if (std::optional<uint64_t> InnerSA =
1961 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {
1962unsigned C1 = *InnerSA;
1963unsigned Opc =ISD::SRL;
1964int Diff = ShAmt - C1;
1965if (Diff < 0) {
1966 Diff = -Diff;
1967 Opc =ISD::SHL;
1968 }
1969SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1970return TLO.CombineTo(
1971Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1972 }
1973 }
1974 }
1975
1976// If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1977// single sra. We can do this if the top bits are never demanded.
1978if (Op0.getOpcode() ==ISD::SRA && Op0.hasOneUse()) {
1979if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1980if (std::optional<uint64_t> InnerSA =
1981 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {
1982unsigned C1 = *InnerSA;
1983// Clamp the combined shift amount if it exceeds the bit width.
1984unsigned Combined = std::min(C1 + ShAmt,BitWidth - 1);
1985SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1986return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1987 Op0.getOperand(0), NewSA));
1988 }
1989 }
1990 }
1991
1992APInt InDemandedMask = (DemandedBits << ShAmt);
1993
1994// If the shift is exact, then it does demand the low bits (and knows that
1995// they are zero).
1996if (Op->getFlags().hasExact())
1997 InDemandedMask.setLowBits(ShAmt);
1998
1999// Narrow shift to lower half - similar to ShrinkDemandedOp.
2000// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2001if ((BitWidth % 2) == 0 && !VT.isVector()) {
2002APInt HiBits =APInt::getHighBitsSet(BitWidth,BitWidth / 2);
2003EVT HalfVT =EVT::getIntegerVT(*TLO.DAG.getContext(),BitWidth / 2);
2004if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2005 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2006isTruncateFree(VT, HalfVT) &&isZExtFree(HalfVT, VT) &&
2007 (!TLO.LegalOperations() ||isOperationLegal(ISD::SRL, HalfVT)) &&
2008 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2009 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2010SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2011SDValue NewShiftAmt =
2012 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2013SDValue NewShift =
2014 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2015return TLO.CombineTo(
2016Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2017 }
2018 }
2019
2020// Compute the new bits that are at the top now.
2021if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2022Depth + 1))
2023returntrue;
2024 Known.Zero.lshrInPlace(ShAmt);
2025 Known.One.lshrInPlace(ShAmt);
2026// High bits known zero.
2027 Known.Zero.setHighBits(ShAmt);
2028
2029// Attempt to avoid multi-use ops if we don't need anything from them.
2030if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2031SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2032 Op0, InDemandedMask, DemandedElts, TLO.DAG,Depth + 1);
2033if (DemandedOp0) {
2034SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2035return TLO.CombineTo(Op, NewOp);
2036 }
2037 }
2038 }else {
2039// Use generic knownbits computation as it has support for non-uniform
2040// shift amounts.
2041 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);
2042 }
2043
2044// If we are only demanding sign bits then we can use the shift source
2045// directly.
2046if (std::optional<uint64_t> MaxSA =
2047 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {
2048unsigned ShAmt = *MaxSA;
2049// Must already be signbits in DemandedBits bounds, and can't demand any
2050// shifted in zeroes.
2051if (DemandedBits.countl_zero() >= ShAmt) {
2052unsigned NumSignBits =
2053 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);
2054if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2055return TLO.CombineTo(Op, Op0);
2056 }
2057 }
2058
2059// Try to match AVG patterns (after shift simplification).
2060if (SDValue AVG =combineShiftToAVG(Op, TLO, *this,DemandedBits,
2061 DemandedElts,Depth + 1))
2062return TLO.CombineTo(Op, AVG);
2063
2064break;
2065 }
2066caseISD::SRA: {
2067SDValue Op0 =Op.getOperand(0);
2068SDValue Op1 =Op.getOperand(1);
2069EVT ShiftVT = Op1.getValueType();
2070
2071// If we only want bits that already match the signbit then we don't need
2072// to shift.
2073unsigned NumHiDemandedBits =BitWidth -DemandedBits.countr_zero();
2074if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1) >=
2075 NumHiDemandedBits)
2076return TLO.CombineTo(Op, Op0);
2077
2078// If this is an arithmetic shift right and only the low-bit is set, we can
2079// always convert this into a logical shr, even if the shift amount is
2080// variable. The low bit of the shift cannot be an input sign bit unless
2081// the shift amount is >= the size of the datatype, which is undefined.
2082if (DemandedBits.isOne())
2083return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2084
2085if (std::optional<uint64_t> KnownSA =
2086 TLO.DAG.getValidShiftAmount(Op, DemandedElts,Depth + 1)) {
2087unsigned ShAmt = *KnownSA;
2088if (ShAmt == 0)
2089return TLO.CombineTo(Op, Op0);
2090
2091// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2092// supports sext_inreg.
2093if (Op0.getOpcode() ==ISD::SHL) {
2094if (std::optional<uint64_t> InnerSA =
2095 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {
2096unsigned LowBits =BitWidth - ShAmt;
2097EVT ExtVT =EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2098if (VT.isVector())
2099 ExtVT =EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2100 VT.getVectorElementCount());
2101
2102if (*InnerSA == ShAmt) {
2103if (!TLO.LegalOperations() ||
2104getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==Legal)
2105return TLO.CombineTo(
2106Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2107 Op0.getOperand(0),
2108 TLO.DAG.getValueType(ExtVT)));
2109
2110// Even if we can't convert to sext_inreg, we might be able to
2111// remove this shift pair if the input is already sign extended.
2112unsigned NumSignBits =
2113 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2114if (NumSignBits > ShAmt)
2115return TLO.CombineTo(Op, Op0.getOperand(0));
2116 }
2117 }
2118 }
2119
2120APInt InDemandedMask = (DemandedBits << ShAmt);
2121
2122// If the shift is exact, then it does demand the low bits (and knows that
2123// they are zero).
2124if (Op->getFlags().hasExact())
2125 InDemandedMask.setLowBits(ShAmt);
2126
2127// If any of the demanded bits are produced by the sign extension, we also
2128// demand the input sign bit.
2129if (DemandedBits.countl_zero() < ShAmt)
2130 InDemandedMask.setSignBit();
2131
2132if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2133Depth + 1))
2134returntrue;
2135 Known.Zero.lshrInPlace(ShAmt);
2136 Known.One.lshrInPlace(ShAmt);
2137
2138// If the input sign bit is known to be zero, or if none of the top bits
2139// are demanded, turn this into an unsigned shift right.
2140if (Known.Zero[BitWidth - ShAmt - 1] ||
2141DemandedBits.countl_zero() >= ShAmt) {
2142SDNodeFlags Flags;
2143 Flags.setExact(Op->getFlags().hasExact());
2144return TLO.CombineTo(
2145Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2146 }
2147
2148intLog2 =DemandedBits.exactLogBase2();
2149if (Log2 >= 0) {
2150// The bit must come from the sign.
2151SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 -Log2, dl, ShiftVT);
2152return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2153 }
2154
2155if (Known.One[BitWidth - ShAmt - 1])
2156// New bits are known one.
2157 Known.One.setHighBits(ShAmt);
2158
2159// Attempt to avoid multi-use ops if we don't need anything from them.
2160if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2161SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2162 Op0, InDemandedMask, DemandedElts, TLO.DAG,Depth + 1);
2163if (DemandedOp0) {
2164SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2165return TLO.CombineTo(Op, NewOp);
2166 }
2167 }
2168 }
2169
2170// Try to match AVG patterns (after shift simplification).
2171if (SDValue AVG =combineShiftToAVG(Op, TLO, *this,DemandedBits,
2172 DemandedElts,Depth + 1))
2173return TLO.CombineTo(Op, AVG);
2174
2175break;
2176 }
2177caseISD::FSHL:
2178caseISD::FSHR: {
2179SDValue Op0 =Op.getOperand(0);
2180SDValue Op1 =Op.getOperand(1);
2181SDValue Op2 =Op.getOperand(2);
2182bool IsFSHL = (Op.getOpcode() ==ISD::FSHL);
2183
2184if (ConstantSDNode *SA =isConstOrConstSplat(Op2, DemandedElts)) {
2185unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2186
2187// For fshl, 0-shift returns the 1st arg.
2188// For fshr, 0-shift returns the 2nd arg.
2189if (Amt == 0) {
2190if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1,DemandedBits, DemandedElts,
2191 Known, TLO,Depth + 1))
2192returntrue;
2193break;
2194 }
2195
2196// fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2197// fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2198APInt Demanded0 =DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2199APInt Demanded1 =DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2200if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2201Depth + 1))
2202returntrue;
2203if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2204Depth + 1))
2205returntrue;
2206
2207 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2208 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2209 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2210 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2211 Known = Known.unionWith(Known2);
2212
2213// Attempt to avoid multi-use ops if we don't need anything from them.
2214if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2215 !DemandedElts.isAllOnes()) {
2216SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2217 Op0, Demanded0, DemandedElts, TLO.DAG,Depth + 1);
2218SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2219 Op1, Demanded1, DemandedElts, TLO.DAG,Depth + 1);
2220if (DemandedOp0 || DemandedOp1) {
2221 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2222 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2223SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2224 DemandedOp1, Op2);
2225return TLO.CombineTo(Op, NewOp);
2226 }
2227 }
2228 }
2229
2230// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2231if (isPowerOf2_32(BitWidth)) {
2232APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(),BitWidth - 1);
2233if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2234 Known2, TLO,Depth + 1))
2235returntrue;
2236 }
2237break;
2238 }
2239caseISD::ROTL:
2240caseISD::ROTR: {
2241SDValue Op0 =Op.getOperand(0);
2242SDValue Op1 =Op.getOperand(1);
2243bool IsROTL = (Op.getOpcode() ==ISD::ROTL);
2244
2245// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2246if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1))
2247return TLO.CombineTo(Op, Op0);
2248
2249if (ConstantSDNode *SA =isConstOrConstSplat(Op1, DemandedElts)) {
2250unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2251unsigned RevAmt =BitWidth - Amt;
2252
2253// rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2254// rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2255APInt Demanded0 =DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2256if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2257Depth + 1))
2258returntrue;
2259
2260// rot*(x, 0) --> x
2261if (Amt == 0)
2262return TLO.CombineTo(Op, Op0);
2263
2264// See if we don't demand either half of the rotated bits.
2265if ((!TLO.LegalOperations() ||isOperationLegal(ISD::SHL, VT)) &&
2266DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2267 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2268return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2269 }
2270if ((!TLO.LegalOperations() ||isOperationLegal(ISD::SRL, VT)) &&
2271DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2272 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2273return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2274 }
2275 }
2276
2277// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2278if (isPowerOf2_32(BitWidth)) {
2279APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(),BitWidth - 1);
2280if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2281Depth + 1))
2282returntrue;
2283 }
2284break;
2285 }
2286caseISD::SMIN:
2287caseISD::SMAX:
2288caseISD::UMIN:
2289caseISD::UMAX: {
2290unsigned Opc =Op.getOpcode();
2291SDValue Op0 =Op.getOperand(0);
2292SDValue Op1 =Op.getOperand(1);
2293
2294// If we're only demanding signbits, then we can simplify to OR/AND node.
2295unsigned BitOp =
2296 (Opc ==ISD::SMIN || Opc ==ISD::UMAX) ?ISD::OR :ISD::AND;
2297unsigned NumSignBits =
2298 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1),
2299 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts,Depth + 1));
2300unsigned NumDemandedUpperBits =BitWidth -DemandedBits.countr_zero();
2301if (NumSignBits >= NumDemandedUpperBits)
2302return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp,SDLoc(Op), VT, Op0, Op1));
2303
2304// Check if one arg is always less/greater than (or equal) to the other arg.
2305KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts,Depth + 1);
2306KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts,Depth + 1);
2307switch (Opc) {
2308caseISD::SMIN:
2309if (std::optional<bool> IsSLE =KnownBits::sle(Known0, Known1))
2310return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2311if (std::optional<bool> IsSLT =KnownBits::slt(Known0, Known1))
2312return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2313 Known =KnownBits::smin(Known0, Known1);
2314break;
2315caseISD::SMAX:
2316if (std::optional<bool> IsSGE =KnownBits::sge(Known0, Known1))
2317return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2318if (std::optional<bool> IsSGT =KnownBits::sgt(Known0, Known1))
2319return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2320 Known =KnownBits::smax(Known0, Known1);
2321break;
2322caseISD::UMIN:
2323if (std::optional<bool> IsULE =KnownBits::ule(Known0, Known1))
2324return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2325if (std::optional<bool> IsULT =KnownBits::ult(Known0, Known1))
2326return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2327 Known =KnownBits::umin(Known0, Known1);
2328break;
2329caseISD::UMAX:
2330if (std::optional<bool> IsUGE =KnownBits::uge(Known0, Known1))
2331return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2332if (std::optional<bool> IsUGT =KnownBits::ugt(Known0, Known1))
2333return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2334 Known =KnownBits::umax(Known0, Known1);
2335break;
2336 }
2337break;
2338 }
2339caseISD::BITREVERSE: {
2340SDValue Src =Op.getOperand(0);
2341APInt DemandedSrcBits =DemandedBits.reverseBits();
2342if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2343Depth + 1))
2344returntrue;
2345 Known.One = Known2.One.reverseBits();
2346 Known.Zero = Known2.Zero.reverseBits();
2347break;
2348 }
2349caseISD::BSWAP: {
2350SDValue Src =Op.getOperand(0);
2351
2352// If the only bits demanded come from one byte of the bswap result,
2353// just shift the input byte into position to eliminate the bswap.
2354unsigned NLZ =DemandedBits.countl_zero();
2355unsigned NTZ =DemandedBits.countr_zero();
2356
2357// Round NTZ down to the next byte. If we have 11 trailing zeros, then
2358// we need all the bits down to bit 8. Likewise, round NLZ. If we
2359// have 14 leading zeros, round to 8.
2360 NLZ =alignDown(NLZ, 8);
2361 NTZ =alignDown(NTZ, 8);
2362// If we need exactly one byte, we can do this transformation.
2363if (BitWidth - NLZ - NTZ == 8) {
2364// Replace this with either a left or right shift to get the byte into
2365// the right place.
2366unsigned ShiftOpcode = NLZ > NTZ ?ISD::SRL :ISD::SHL;
2367if (!TLO.LegalOperations() ||isOperationLegal(ShiftOpcode, VT)) {
2368unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2369SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2370SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2371return TLO.CombineTo(Op, NewOp);
2372 }
2373 }
2374
2375APInt DemandedSrcBits =DemandedBits.byteSwap();
2376if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2377Depth + 1))
2378returntrue;
2379 Known.One = Known2.One.byteSwap();
2380 Known.Zero = Known2.Zero.byteSwap();
2381break;
2382 }
2383caseISD::CTPOP: {
2384// If only 1 bit is demanded, replace with PARITY as long as we're before
2385// op legalization.
2386// FIXME: Limit to scalars for now.
2387if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2388return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2389Op.getOperand(0)));
2390
2391 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);
2392break;
2393 }
2394caseISD::SIGN_EXTEND_INREG: {
2395SDValue Op0 =Op.getOperand(0);
2396EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2397unsigned ExVTBits = ExVT.getScalarSizeInBits();
2398
2399// If we only care about the highest bit, don't bother shifting right.
2400if (DemandedBits.isSignMask()) {
2401unsigned MinSignedBits =
2402 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts,Depth + 1);
2403bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2404// However if the input is already sign extended we expect the sign
2405// extension to be dropped altogether later and do not simplify.
2406if (!AlreadySignExtended) {
2407// Compute the correct shift amount type, which must be getShiftAmountTy
2408// for scalar types after legalization.
2409SDValue ShiftAmt =
2410 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2411return TLO.CombineTo(Op,
2412 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2413 }
2414 }
2415
2416// If none of the extended bits are demanded, eliminate the sextinreg.
2417if (DemandedBits.getActiveBits() <= ExVTBits)
2418return TLO.CombineTo(Op, Op0);
2419
2420APInt InputDemandedBits =DemandedBits.getLoBits(ExVTBits);
2421
2422// Since the sign extended bits are demanded, we know that the sign
2423// bit is demanded.
2424 InputDemandedBits.setBit(ExVTBits - 1);
2425
2426if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2427Depth + 1))
2428returntrue;
2429
2430// If the sign bit of the input is known set or clear, then we know the
2431// top bits of the result.
2432
2433// If the input sign bit is known zero, convert this into a zero extension.
2434if (Known.Zero[ExVTBits - 1])
2435return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2436
2437APInt Mask =APInt::getLowBitsSet(BitWidth, ExVTBits);
2438if (Known.One[ExVTBits - 1]) {// Input sign bit known set
2439 Known.One.setBitsFrom(ExVTBits);
2440 Known.Zero &= Mask;
2441 }else {// Input sign bit unknown
2442 Known.Zero &= Mask;
2443 Known.One &= Mask;
2444 }
2445break;
2446 }
2447caseISD::BUILD_PAIR: {
2448EVT HalfVT =Op.getOperand(0).getValueType();
2449unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2450
2451APInt MaskLo =DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2452APInt MaskHi =DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2453
2454KnownBits KnownLo, KnownHi;
2455
2456if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO,Depth + 1))
2457returntrue;
2458
2459if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO,Depth + 1))
2460returntrue;
2461
2462 Known = KnownHi.concat(KnownLo);
2463break;
2464 }
2465caseISD::ZERO_EXTEND_VECTOR_INREG:
2466if (VT.isScalableVector())
2467returnfalse;
2468 [[fallthrough]];
2469caseISD::ZERO_EXTEND: {
2470SDValue Src =Op.getOperand(0);
2471EVT SrcVT = Src.getValueType();
2472unsigned InBits = SrcVT.getScalarSizeInBits();
2473unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2474bool IsVecInReg =Op.getOpcode() ==ISD::ZERO_EXTEND_VECTOR_INREG;
2475
2476// If none of the top bits are demanded, convert this into an any_extend.
2477if (DemandedBits.getActiveBits() <= InBits) {
2478// If we only need the non-extended bits of the bottom element
2479// then we can just bitcast to the result.
2480if (IsLE && IsVecInReg && DemandedElts == 1 &&
2481 VT.getSizeInBits() == SrcVT.getSizeInBits())
2482return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2483
2484unsigned Opc =
2485 IsVecInReg ?ISD::ANY_EXTEND_VECTOR_INREG :ISD::ANY_EXTEND;
2486if (!TLO.LegalOperations() ||isOperationLegal(Opc, VT))
2487return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2488 }
2489
2490APInt InDemandedBits =DemandedBits.trunc(InBits);
2491APInt InDemandedElts = DemandedElts.zext(InElts);
2492if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2493Depth + 1)) {
2494Op->dropFlags(SDNodeFlags::NonNeg);
2495returntrue;
2496 }
2497assert(Known.getBitWidth() == InBits &&"Src width has changed?");
2498 Known = Known.zext(BitWidth);
2499
2500// Attempt to avoid multi-use ops if we don't need anything from them.
2501if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2502 Src, InDemandedBits, InDemandedElts, TLO.DAG,Depth + 1))
2503return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2504break;
2505 }
2506caseISD::SIGN_EXTEND_VECTOR_INREG:
2507if (VT.isScalableVector())
2508returnfalse;
2509 [[fallthrough]];
2510caseISD::SIGN_EXTEND: {
2511SDValue Src =Op.getOperand(0);
2512EVT SrcVT = Src.getValueType();
2513unsigned InBits = SrcVT.getScalarSizeInBits();
2514unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2515bool IsVecInReg =Op.getOpcode() ==ISD::SIGN_EXTEND_VECTOR_INREG;
2516
2517APInt InDemandedElts = DemandedElts.zext(InElts);
2518APInt InDemandedBits =DemandedBits.trunc(InBits);
2519
2520// Since some of the sign extended bits are demanded, we know that the sign
2521// bit is demanded.
2522 InDemandedBits.setBit(InBits - 1);
2523
2524// If none of the top bits are demanded, convert this into an any_extend.
2525if (DemandedBits.getActiveBits() <= InBits) {
2526// If we only need the non-extended bits of the bottom element
2527// then we can just bitcast to the result.
2528if (IsLE && IsVecInReg && DemandedElts == 1 &&
2529 VT.getSizeInBits() == SrcVT.getSizeInBits())
2530return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2531
2532// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2533if (getBooleanContents(VT) !=ZeroOrNegativeOneBooleanContent ||
2534 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts,Depth + 1) !=
2535 InBits) {
2536unsigned Opc =
2537 IsVecInReg ?ISD::ANY_EXTEND_VECTOR_INREG :ISD::ANY_EXTEND;
2538if (!TLO.LegalOperations() ||isOperationLegal(Opc, VT))
2539return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2540 }
2541 }
2542
2543if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2544Depth + 1))
2545returntrue;
2546assert(Known.getBitWidth() == InBits &&"Src width has changed?");
2547
2548// If the sign bit is known one, the top bits match.
2549 Known = Known.sext(BitWidth);
2550
2551// If the sign bit is known zero, convert this to a zero extend.
2552if (Known.isNonNegative()) {
2553unsigned Opc =
2554 IsVecInReg ?ISD::ZERO_EXTEND_VECTOR_INREG :ISD::ZERO_EXTEND;
2555if (!TLO.LegalOperations() ||isOperationLegal(Opc, VT)) {
2556SDNodeFlags Flags;
2557if (!IsVecInReg)
2558 Flags |=SDNodeFlags::NonNeg;
2559return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2560 }
2561 }
2562
2563// Attempt to avoid multi-use ops if we don't need anything from them.
2564if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2565 Src, InDemandedBits, InDemandedElts, TLO.DAG,Depth + 1))
2566return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2567break;
2568 }
2569caseISD::ANY_EXTEND_VECTOR_INREG:
2570if (VT.isScalableVector())
2571returnfalse;
2572 [[fallthrough]];
2573caseISD::ANY_EXTEND: {
2574SDValue Src =Op.getOperand(0);
2575EVT SrcVT = Src.getValueType();
2576unsigned InBits = SrcVT.getScalarSizeInBits();
2577unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2578bool IsVecInReg =Op.getOpcode() ==ISD::ANY_EXTEND_VECTOR_INREG;
2579
2580// If we only need the bottom element then we can just bitcast.
2581// TODO: Handle ANY_EXTEND?
2582if (IsLE && IsVecInReg && DemandedElts == 1 &&
2583 VT.getSizeInBits() == SrcVT.getSizeInBits())
2584return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2585
2586APInt InDemandedBits =DemandedBits.trunc(InBits);
2587APInt InDemandedElts = DemandedElts.zext(InElts);
2588if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2589Depth + 1))
2590returntrue;
2591assert(Known.getBitWidth() == InBits &&"Src width has changed?");
2592 Known = Known.anyext(BitWidth);
2593
2594// Attempt to avoid multi-use ops if we don't need anything from them.
2595if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2596 Src, InDemandedBits, InDemandedElts, TLO.DAG,Depth + 1))
2597return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2598break;
2599 }
2600caseISD::TRUNCATE: {
2601SDValue Src =Op.getOperand(0);
2602
2603// Simplify the input, using demanded bit information, and compute the known
2604// zero/one bits live out.
2605unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2606APInt TruncMask =DemandedBits.zext(OperandBitWidth);
2607if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2608Depth + 1)) {
2609// Disable the nsw and nuw flags. We can no longer guarantee that we
2610// won't wrap after simplification.
2611Op->dropFlags(SDNodeFlags::NoWrap);
2612returntrue;
2613 }
2614 Known = Known.trunc(BitWidth);
2615
2616// Attempt to avoid multi-use ops if we don't need anything from them.
2617if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2618 Src, TruncMask, DemandedElts, TLO.DAG,Depth + 1))
2619return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2620
2621// If the input is only used by this truncate, see if we can shrink it based
2622// on the known demanded bits.
2623switch (Src.getOpcode()) {
2624default:
2625break;
2626caseISD::SRL:
2627// Shrink SRL by a constant if none of the high bits shifted in are
2628// demanded.
2629if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2630// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2631// undesirable.
2632break;
2633
2634if (Src.getNode()->hasOneUse()) {
2635if (isTruncateFree(Src, VT) &&
2636 !isTruncateFree(Src.getValueType(), VT)) {
2637// If truncate is only free at trunc(srl), do not turn it into
2638// srl(trunc). The check is done by first check the truncate is free
2639// at Src's opcode(srl), then check the truncate is not done by
2640// referencing sub-register. In test, if both trunc(srl) and
2641// srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2642// trunc(srl)'s trunc is free, trunc(srl) is better.
2643break;
2644 }
2645
2646 std::optional<uint64_t> ShAmtC =
2647 TLO.DAG.getValidShiftAmount(Src, DemandedElts,Depth + 2);
2648if (!ShAmtC || *ShAmtC >=BitWidth)
2649break;
2650uint64_t ShVal = *ShAmtC;
2651
2652APInt HighBits =
2653APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth -BitWidth);
2654 HighBits.lshrInPlace(ShVal);
2655 HighBits = HighBits.trunc(BitWidth);
2656if (!(HighBits &DemandedBits)) {
2657// None of the shifted in bits are needed. Add a truncate of the
2658// shift input, then shift it.
2659SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2660SDValue NewTrunc =
2661 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2662return TLO.CombineTo(
2663Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2664 }
2665 }
2666break;
2667 }
2668
2669break;
2670 }
2671caseISD::AssertZext: {
2672// AssertZext demands all of the high bits, plus any of the low bits
2673// demanded by its users.
2674EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2675APInt InMask =APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2676if (SimplifyDemandedBits(Op.getOperand(0), ~InMask |DemandedBits, Known,
2677 TLO,Depth + 1))
2678returntrue;
2679
2680 Known.Zero |= ~InMask;
2681 Known.One &= (~Known.Zero);
2682break;
2683 }
2684caseISD::EXTRACT_VECTOR_ELT: {
2685SDValue Src =Op.getOperand(0);
2686SDValueIdx =Op.getOperand(1);
2687ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2688unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2689
2690if (SrcEltCnt.isScalable())
2691returnfalse;
2692
2693// Demand the bits from every vector element without a constant index.
2694unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2695APInt DemandedSrcElts =APInt::getAllOnes(NumSrcElts);
2696if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2697if (CIdx->getAPIntValue().ult(NumSrcElts))
2698 DemandedSrcElts =APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2699
2700// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2701// anything about the extended bits.
2702APInt DemandedSrcBits =DemandedBits;
2703if (BitWidth > EltBitWidth)
2704 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2705
2706if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2707Depth + 1))
2708returntrue;
2709
2710// Attempt to avoid multi-use ops if we don't need anything from them.
2711if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2712if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2713 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG,Depth + 1)) {
2714SDValue NewOp =
2715 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,Idx);
2716return TLO.CombineTo(Op, NewOp);
2717 }
2718 }
2719
2720 Known = Known2;
2721if (BitWidth > EltBitWidth)
2722 Known = Known.anyext(BitWidth);
2723break;
2724 }
2725caseISD::BITCAST: {
2726if (VT.isScalableVector())
2727returnfalse;
2728SDValue Src =Op.getOperand(0);
2729EVT SrcVT = Src.getValueType();
2730unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2731
2732// If this is an FP->Int bitcast and if the sign bit is the only
2733// thing demanded, turn this into a FGETSIGN.
2734if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2735DemandedBits ==APInt::getSignMask(Op.getValueSizeInBits()) &&
2736 SrcVT.isFloatingPoint()) {
2737bool OpVTLegal =isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2738bool i32Legal =isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2739if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2740 SrcVT != MVT::f128) {
2741// Cannot eliminate/lower SHL for f128 yet.
2742EVT Ty = OpVTLegal ? VT : MVT::i32;
2743// Make a FGETSIGN + SHL to move the sign bit into the appropriate
2744// place. We expect the SHL to be eliminated by other optimizations.
2745SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2746unsigned OpVTSizeInBits =Op.getValueSizeInBits();
2747if (!OpVTLegal && OpVTSizeInBits > 32)
2748 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2749unsigned ShVal =Op.getValueSizeInBits() - 1;
2750SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2751return TLO.CombineTo(Op,
2752 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2753 }
2754 }
2755
2756// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2757// Demand the elt/bit if any of the original elts/bits are demanded.
2758if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2759unsigned Scale =BitWidth / NumSrcEltBits;
2760unsigned NumSrcElts = SrcVT.getVectorNumElements();
2761APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);
2762APInt DemandedSrcElts =APInt::getZero(NumSrcElts);
2763for (unsigned i = 0; i != Scale; ++i) {
2764unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2765unsigned BitOffset = EltOffset * NumSrcEltBits;
2766APInt Sub =DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2767if (!Sub.isZero()) {
2768 DemandedSrcBits |= Sub;
2769for (unsigned j = 0; j != NumElts; ++j)
2770if (DemandedElts[j])
2771 DemandedSrcElts.setBit((j * Scale) + i);
2772 }
2773 }
2774
2775APInt KnownSrcUndef, KnownSrcZero;
2776if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2777 KnownSrcZero, TLO,Depth + 1))
2778returntrue;
2779
2780KnownBits KnownSrcBits;
2781if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2782 KnownSrcBits, TLO,Depth + 1))
2783returntrue;
2784 }elseif (IsLE && (NumSrcEltBits %BitWidth) == 0) {
2785// TODO - bigendian once we have test coverage.
2786unsigned Scale = NumSrcEltBits /BitWidth;
2787unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2788APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);
2789APInt DemandedSrcElts =APInt::getZero(NumSrcElts);
2790for (unsigned i = 0; i != NumElts; ++i)
2791if (DemandedElts[i]) {
2792unsignedOffset = (i % Scale) *BitWidth;
2793 DemandedSrcBits.insertBits(DemandedBits,Offset);
2794 DemandedSrcElts.setBit(i / Scale);
2795 }
2796
2797if (SrcVT.isVector()) {
2798APInt KnownSrcUndef, KnownSrcZero;
2799if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2800 KnownSrcZero, TLO,Depth + 1))
2801returntrue;
2802 }
2803
2804KnownBits KnownSrcBits;
2805if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2806 KnownSrcBits, TLO,Depth + 1))
2807returntrue;
2808
2809// Attempt to avoid multi-use ops if we don't need anything from them.
2810if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2811if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2812 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG,Depth + 1)) {
2813SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2814return TLO.CombineTo(Op, NewOp);
2815 }
2816 }
2817 }
2818
2819// If this is a bitcast, let computeKnownBits handle it. Only do this on a
2820// recursive call where Known may be useful to the caller.
2821if (Depth > 0) {
2822 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);
2823returnfalse;
2824 }
2825break;
2826 }
2827caseISD::MUL:
2828if (DemandedBits.isPowerOf2()) {
2829// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2830// If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2831// odd (has LSB set), then the left-shifted low bit of X is the answer.
2832unsigned CTZ =DemandedBits.countr_zero();
2833ConstantSDNode *C =isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2834if (C &&C->getAPIntValue().countr_zero() == CTZ) {
2835SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2836SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT,Op.getOperand(0), AmtC);
2837return TLO.CombineTo(Op, Shl);
2838 }
2839 }
2840// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2841// X * X is odd iff X is odd.
2842// 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2843if (Op.getOperand(0) ==Op.getOperand(1) &&DemandedBits.ult(4)) {
2844SDValue One = TLO.DAG.getConstant(1, dl, VT);
2845SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT,Op.getOperand(0), One);
2846return TLO.CombineTo(Op, And1);
2847 }
2848 [[fallthrough]];
2849caseISD::ADD:
2850caseISD::SUB: {
2851// Add, Sub, and Mul don't demand any bits in positions beyond that
2852// of the highest bit demanded of them.
2853SDValue Op0 =Op.getOperand(0), Op1 =Op.getOperand(1);
2854SDNodeFlags Flags =Op.getNode()->getFlags();
2855unsigned DemandedBitsLZ =DemandedBits.countl_zero();
2856APInt LoMask =APInt::getLowBitsSet(BitWidth,BitWidth - DemandedBitsLZ);
2857KnownBits KnownOp0, KnownOp1;
2858auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2859constKnownBits &KnownRHS) {
2860if (Op.getOpcode() ==ISD::MUL)
2861 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2862return Demanded;
2863 };
2864if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2865Depth + 1) ||
2866 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2867 DemandedElts, KnownOp0, TLO,Depth + 1) ||
2868// See if the operation should be performed at a smaller bit width.
2869 ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO)) {
2870// Disable the nsw and nuw flags. We can no longer guarantee that we
2871// won't wrap after simplification.
2872Op->dropFlags(SDNodeFlags::NoWrap);
2873returntrue;
2874 }
2875
2876// neg x with only low bit demanded is simply x.
2877if (Op.getOpcode() ==ISD::SUB &&DemandedBits.isOne() &&
2878isNullConstant(Op0))
2879return TLO.CombineTo(Op, Op1);
2880
2881// Attempt to avoid multi-use ops if we don't need anything from them.
2882if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2883SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2884 Op0, LoMask, DemandedElts, TLO.DAG,Depth + 1);
2885SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2886 Op1, LoMask, DemandedElts, TLO.DAG,Depth + 1);
2887if (DemandedOp0 || DemandedOp1) {
2888 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2889 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2890SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2891 Flags & ~SDNodeFlags::NoWrap);
2892return TLO.CombineTo(Op, NewOp);
2893 }
2894 }
2895
2896// If we have a constant operand, we may be able to turn it into -1 if we
2897// do not demand the high bits. This can make the constant smaller to
2898// encode, allow more general folding, or match specialized instruction
2899// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2900// is probably not useful (and could be detrimental).
2901ConstantSDNode *C =isConstOrConstSplat(Op1);
2902APInt HighMask =APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2903if (C && !C->isAllOnes() && !C->isOne() &&
2904 (C->getAPIntValue() | HighMask).isAllOnes()) {
2905SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2906// Disable the nsw and nuw flags. We can no longer guarantee that we
2907// won't wrap after simplification.
2908SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2909 Flags & ~SDNodeFlags::NoWrap);
2910return TLO.CombineTo(Op, NewOp);
2911 }
2912
2913// Match a multiply with a disguised negated-power-of-2 and convert to a
2914// an equivalent shift-left amount.
2915// Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2916auto getShiftLeftAmt = [&HighMask](SDValueMul) ->unsigned {
2917if (Mul.getOpcode() !=ISD::MUL || !Mul.hasOneUse())
2918return 0;
2919
2920// Don't touch opaque constants. Also, ignore zero and power-of-2
2921// multiplies. Those will get folded later.
2922ConstantSDNode *MulC =isConstOrConstSplat(Mul.getOperand(1));
2923if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2924 !MulC->getAPIntValue().isPowerOf2()) {
2925APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2926if (UnmaskedC.isNegatedPowerOf2())
2927return (-UnmaskedC).logBase2();
2928 }
2929return 0;
2930 };
2931
2932auto foldMul = [&](ISD::NodeType NT,SDValueX,SDValueY,
2933unsigned ShlAmt) {
2934SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2935SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT,X, ShlAmtC);
2936SDValue Res = TLO.DAG.getNode(NT, dl, VT,Y, Shl);
2937return TLO.CombineTo(Op, Res);
2938 };
2939
2940if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2941if (Op.getOpcode() ==ISD::ADD) {
2942// (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2943if (unsigned ShAmt = getShiftLeftAmt(Op0))
2944return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2945// Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2946if (unsigned ShAmt = getShiftLeftAmt(Op1))
2947return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2948 }
2949if (Op.getOpcode() ==ISD::SUB) {
2950// Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2951if (unsigned ShAmt = getShiftLeftAmt(Op1))
2952return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2953 }
2954 }
2955
2956if (Op.getOpcode() ==ISD::MUL) {
2957 Known =KnownBits::mul(KnownOp0, KnownOp1);
2958 }else {// Op.getOpcode() is either ISD::ADD or ISD::SUB.
2959 Known =KnownBits::computeForAddSub(
2960Op.getOpcode() ==ISD::ADD, Flags.hasNoSignedWrap(),
2961 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2962 }
2963break;
2964 }
2965default:
2966// We also ask the target about intrinsics (which could be specific to it).
2967if (Op.getOpcode() >=ISD::BUILTIN_OP_END ||
2968Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN) {
2969// TODO: Probably okay to remove after audit; here to reduce change size
2970// in initial enablement patch for scalable vectors
2971if (Op.getValueType().isScalableVector())
2972break;
2973if (SimplifyDemandedBitsForTargetNode(Op,DemandedBits, DemandedElts,
2974 Known, TLO,Depth))
2975returntrue;
2976break;
2977 }
2978
2979// Just use computeKnownBits to compute output bits.
2980 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);
2981break;
2982 }
2983
2984// If we know the value of all of the demanded bits, return this as a
2985// constant.
2986if (!isTargetCanonicalConstantNode(Op) &&
2987DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2988// Avoid folding to a constant if any OpaqueConstant is involved.
2989if (llvm::any_of(Op->ops(), [](SDValue V) {
2990 auto *C = dyn_cast<ConstantSDNode>(V);
2991 return C && C->isOpaque();
2992 }))
2993returnfalse;
2994if (VT.isInteger())
2995return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2996if (VT.isFloatingPoint())
2997return TLO.CombineTo(
2998Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2999 dl, VT));
3000 }
3001
3002// A multi use 'all demanded elts' simplify failed to find any knownbits.
3003// Try again just for the original demanded elts.
3004// Ensure we do this AFTER constant folding above.
3005if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3006 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts,Depth);
3007
3008returnfalse;
3009}
3010
3011boolTargetLowering::SimplifyDemandedVectorElts(SDValueOp,
3012constAPInt &DemandedElts,
3013DAGCombinerInfo &DCI) const{
3014SelectionDAG &DAG = DCI.DAG;
3015TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3016 !DCI.isBeforeLegalizeOps());
3017
3018APInt KnownUndef, KnownZero;
3019bool Simplified =
3020 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3021if (Simplified) {
3022 DCI.AddToWorklist(Op.getNode());
3023 DCI.CommitTargetLoweringOpt(TLO);
3024 }
3025
3026return Simplified;
3027}
3028
3029/// Given a vector binary operation and known undefined elements for each input
3030/// operand, compute whether each element of the output is undefined.
3031staticAPIntgetKnownUndefForVectorBinop(SDValue BO,SelectionDAG &DAG,
3032constAPInt &UndefOp0,
3033constAPInt &UndefOp1) {
3034EVT VT = BO.getValueType();
3035assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3036"Vector binop only");
3037
3038EVT EltVT = VT.getVectorElementType();
3039unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3040assert(UndefOp0.getBitWidth() == NumElts &&
3041 UndefOp1.getBitWidth() == NumElts &&"Bad type for undef analysis");
3042
3043auto getUndefOrConstantElt = [&](SDValue V,unsigned Index,
3044constAPInt &UndefVals) {
3045if (UndefVals[Index])
3046return DAG.getUNDEF(EltVT);
3047
3048if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3049// Try hard to make sure that the getNode() call is not creating temporary
3050// nodes. Ignore opaque integers because they do not constant fold.
3051SDValue Elt = BV->getOperand(Index);
3052auto *C = dyn_cast<ConstantSDNode>(Elt);
3053if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3054return Elt;
3055 }
3056
3057returnSDValue();
3058 };
3059
3060APInt KnownUndef =APInt::getZero(NumElts);
3061for (unsigned i = 0; i != NumElts; ++i) {
3062// If both inputs for this element are either constant or undef and match
3063// the element type, compute the constant/undef result for this element of
3064// the vector.
3065// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3066// not handle FP constants. The code within getNode() should be refactored
3067// to avoid the danger of creating a bogus temporary node here.
3068SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3069SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3070if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3071if (DAG.getNode(BO.getOpcode(),SDLoc(BO), EltVT, C0, C1).isUndef())
3072 KnownUndef.setBit(i);
3073 }
3074return KnownUndef;
3075}
3076
3077boolTargetLowering::SimplifyDemandedVectorElts(
3078SDValueOp,constAPInt &OriginalDemandedElts,APInt &KnownUndef,
3079APInt &KnownZero,TargetLoweringOpt &TLO,unsignedDepth,
3080bool AssumeSingleUse) const{
3081EVT VT =Op.getValueType();
3082unsigned Opcode =Op.getOpcode();
3083APInt DemandedElts = OriginalDemandedElts;
3084unsigned NumElts = DemandedElts.getBitWidth();
3085assert(VT.isVector() &&"Expected vector op");
3086
3087 KnownUndef = KnownZero =APInt::getZero(NumElts);
3088
3089if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3090returnfalse;
3091
3092// TODO: For now we assume we know nothing about scalable vectors.
3093if (VT.isScalableVector())
3094returnfalse;
3095
3096assert(VT.getVectorNumElements() == NumElts &&
3097"Mask size mismatches value type element count!");
3098
3099// Undef operand.
3100if (Op.isUndef()) {
3101 KnownUndef.setAllBits();
3102returnfalse;
3103 }
3104
3105// If Op has other users, assume that all elements are needed.
3106if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3107 DemandedElts.setAllBits();
3108
3109// Not demanding any elements from Op.
3110if (DemandedElts == 0) {
3111 KnownUndef.setAllBits();
3112return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3113 }
3114
3115// Limit search depth.
3116if (Depth >=SelectionDAG::MaxRecursionDepth)
3117returnfalse;
3118
3119SDLocDL(Op);
3120unsigned EltSizeInBits = VT.getScalarSizeInBits();
3121bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3122
3123// Helper for demanding the specified elements and all the bits of both binary
3124// operands.
3125auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0,SDValue Op1) {
3126SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3127 TLO.DAG,Depth + 1);
3128SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3129 TLO.DAG,Depth + 1);
3130if (NewOp0 || NewOp1) {
3131SDValue NewOp =
3132 TLO.DAG.getNode(Opcode,SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3133 NewOp1 ? NewOp1 : Op1,Op->getFlags());
3134return TLO.CombineTo(Op, NewOp);
3135 }
3136returnfalse;
3137 };
3138
3139switch (Opcode) {
3140caseISD::SCALAR_TO_VECTOR: {
3141if (!DemandedElts[0]) {
3142 KnownUndef.setAllBits();
3143return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3144 }
3145SDValue ScalarSrc =Op.getOperand(0);
3146if (ScalarSrc.getOpcode() ==ISD::EXTRACT_VECTOR_ELT) {
3147SDValue Src = ScalarSrc.getOperand(0);
3148SDValueIdx = ScalarSrc.getOperand(1);
3149EVT SrcVT = Src.getValueType();
3150
3151ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3152
3153if (SrcEltCnt.isScalable())
3154returnfalse;
3155
3156unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3157if (isNullConstant(Idx)) {
3158APInt SrcDemandedElts =APInt::getOneBitSet(NumSrcElts, 0);
3159APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3160APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3161if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3162 TLO,Depth + 1))
3163returntrue;
3164 }
3165 }
3166 KnownUndef.setHighBits(NumElts - 1);
3167break;
3168 }
3169caseISD::BITCAST: {
3170SDValue Src =Op.getOperand(0);
3171EVT SrcVT = Src.getValueType();
3172
3173// We only handle vectors here.
3174// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3175if (!SrcVT.isVector())
3176break;
3177
3178// Fast handling of 'identity' bitcasts.
3179unsigned NumSrcElts = SrcVT.getVectorNumElements();
3180if (NumSrcElts == NumElts)
3181return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3182 KnownZero, TLO,Depth + 1);
3183
3184APInt SrcDemandedElts, SrcZero, SrcUndef;
3185
3186// Bitcast from 'large element' src vector to 'small element' vector, we
3187// must demand a source element if any DemandedElt maps to it.
3188if ((NumElts % NumSrcElts) == 0) {
3189unsigned Scale = NumElts / NumSrcElts;
3190 SrcDemandedElts =APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3191if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3192 TLO,Depth + 1))
3193returntrue;
3194
3195// Try calling SimplifyDemandedBits, converting demanded elts to the bits
3196// of the large element.
3197// TODO - bigendian once we have test coverage.
3198if (IsLE) {
3199unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3200APInt SrcDemandedBits =APInt::getZero(SrcEltSizeInBits);
3201for (unsigned i = 0; i != NumElts; ++i)
3202if (DemandedElts[i]) {
3203unsigned Ofs = (i % Scale) * EltSizeInBits;
3204 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3205 }
3206
3207KnownBits Known;
3208if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3209 TLO,Depth + 1))
3210returntrue;
3211
3212// The bitcast has split each wide element into a number of
3213// narrow subelements. We have just computed the Known bits
3214// for wide elements. See if element splitting results in
3215// some subelements being zero. Only for demanded elements!
3216for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3217if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3218 .isAllOnes())
3219continue;
3220for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3221unsigned Elt = Scale * SrcElt + SubElt;
3222if (DemandedElts[Elt])
3223 KnownZero.setBit(Elt);
3224 }
3225 }
3226 }
3227
3228// If the src element is zero/undef then all the output elements will be -
3229// only demanded elements are guaranteed to be correct.
3230for (unsigned i = 0; i != NumSrcElts; ++i) {
3231if (SrcDemandedElts[i]) {
3232if (SrcZero[i])
3233 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3234if (SrcUndef[i])
3235 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3236 }
3237 }
3238 }
3239
3240// Bitcast from 'small element' src vector to 'large element' vector, we
3241// demand all smaller source elements covered by the larger demanded element
3242// of this vector.
3243if ((NumSrcElts % NumElts) == 0) {
3244unsigned Scale = NumSrcElts / NumElts;
3245 SrcDemandedElts =APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3246if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3247 TLO,Depth + 1))
3248returntrue;
3249
3250// If all the src elements covering an output element are zero/undef, then
3251// the output element will be as well, assuming it was demanded.
3252for (unsigned i = 0; i != NumElts; ++i) {
3253if (DemandedElts[i]) {
3254if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3255 KnownZero.setBit(i);
3256if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3257 KnownUndef.setBit(i);
3258 }
3259 }
3260 }
3261break;
3262 }
3263caseISD::FREEZE: {
3264SDValue N0 =Op.getOperand(0);
3265if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3266/*PoisonOnly=*/false))
3267return TLO.CombineTo(Op, N0);
3268
3269// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3270// freeze(op(x, ...)) -> op(freeze(x), ...).
3271if (N0.getOpcode() ==ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3272return TLO.CombineTo(
3273Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR,DL, VT,
3274 TLO.DAG.getFreeze(N0.getOperand(0))));
3275break;
3276 }
3277caseISD::BUILD_VECTOR: {
3278// Check all elements and simplify any unused elements with UNDEF.
3279if (!DemandedElts.isAllOnes()) {
3280// Don't simplify BROADCASTS.
3281if (llvm::any_of(Op->op_values(),
3282 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3283SmallVector<SDValue, 32> Ops(Op->ops());
3284bool Updated =false;
3285for (unsigned i = 0; i != NumElts; ++i) {
3286if (!DemandedElts[i] && !Ops[i].isUndef()) {
3287 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3288 KnownUndef.setBit(i);
3289 Updated =true;
3290 }
3291 }
3292if (Updated)
3293return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT,DL, Ops));
3294 }
3295 }
3296for (unsigned i = 0; i != NumElts; ++i) {
3297SDValueSrcOp =Op.getOperand(i);
3298if (SrcOp.isUndef()) {
3299 KnownUndef.setBit(i);
3300 }elseif (EltSizeInBits ==SrcOp.getScalarValueSizeInBits() &&
3301 (isNullConstant(SrcOp) ||isNullFPConstant(SrcOp))) {
3302 KnownZero.setBit(i);
3303 }
3304 }
3305break;
3306 }
3307caseISD::CONCAT_VECTORS: {
3308EVT SubVT =Op.getOperand(0).getValueType();
3309unsigned NumSubVecs =Op.getNumOperands();
3310unsigned NumSubElts = SubVT.getVectorNumElements();
3311for (unsigned i = 0; i != NumSubVecs; ++i) {
3312SDValue SubOp =Op.getOperand(i);
3313APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3314APInt SubUndef, SubZero;
3315if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3316Depth + 1))
3317returntrue;
3318 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3319 KnownZero.insertBits(SubZero, i * NumSubElts);
3320 }
3321
3322// Attempt to avoid multi-use ops if we don't need anything from them.
3323if (!DemandedElts.isAllOnes()) {
3324bool FoundNewSub =false;
3325SmallVector<SDValue, 2> DemandedSubOps;
3326for (unsigned i = 0; i != NumSubVecs; ++i) {
3327SDValue SubOp =Op.getOperand(i);
3328APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3329SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3330 SubOp, SubElts, TLO.DAG,Depth + 1);
3331 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3332 FoundNewSub = NewSubOp ?true : FoundNewSub;
3333 }
3334if (FoundNewSub) {
3335SDValue NewOp =
3336 TLO.DAG.getNode(Op.getOpcode(),SDLoc(Op), VT, DemandedSubOps);
3337return TLO.CombineTo(Op, NewOp);
3338 }
3339 }
3340break;
3341 }
3342caseISD::INSERT_SUBVECTOR: {
3343// Demand any elements from the subvector and the remainder from the src its
3344// inserted into.
3345SDValue Src =Op.getOperand(0);
3346SDValue Sub =Op.getOperand(1);
3347uint64_tIdx =Op.getConstantOperandVal(2);
3348unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3349APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts,Idx);
3350APInt DemandedSrcElts = DemandedElts;
3351 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts),Idx);
3352
3353APInt SubUndef, SubZero;
3354if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3355Depth + 1))
3356returntrue;
3357
3358// If none of the src operand elements are demanded, replace it with undef.
3359if (!DemandedSrcElts && !Src.isUndef())
3360return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT,
3361 TLO.DAG.getUNDEF(VT), Sub,
3362Op.getOperand(2)));
3363
3364if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3365 TLO,Depth + 1))
3366returntrue;
3367 KnownUndef.insertBits(SubUndef,Idx);
3368 KnownZero.insertBits(SubZero,Idx);
3369
3370// Attempt to avoid multi-use ops if we don't need anything from them.
3371if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3372SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3373 Src, DemandedSrcElts, TLO.DAG,Depth + 1);
3374SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3375 Sub, DemandedSubElts, TLO.DAG,Depth + 1);
3376if (NewSrc || NewSub) {
3377 NewSrc = NewSrc ? NewSrc : Src;
3378 NewSub = NewSub ? NewSub : Sub;
3379SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(),SDLoc(Op), VT, NewSrc,
3380 NewSub,Op.getOperand(2));
3381return TLO.CombineTo(Op, NewOp);
3382 }
3383 }
3384break;
3385 }
3386caseISD::EXTRACT_SUBVECTOR: {
3387// Offset the demanded elts by the subvector index.
3388SDValue Src =Op.getOperand(0);
3389if (Src.getValueType().isScalableVector())
3390break;
3391uint64_tIdx =Op.getConstantOperandVal(1);
3392unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3393APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3394
3395APInt SrcUndef, SrcZero;
3396if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3397Depth + 1))
3398returntrue;
3399 KnownUndef = SrcUndef.extractBits(NumElts,Idx);
3400 KnownZero = SrcZero.extractBits(NumElts,Idx);
3401
3402// Attempt to avoid multi-use ops if we don't need anything from them.
3403if (!DemandedElts.isAllOnes()) {
3404SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3405 Src, DemandedSrcElts, TLO.DAG,Depth + 1);
3406if (NewSrc) {
3407SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(),SDLoc(Op), VT, NewSrc,
3408Op.getOperand(1));
3409return TLO.CombineTo(Op, NewOp);
3410 }
3411 }
3412break;
3413 }
3414caseISD::INSERT_VECTOR_ELT: {
3415SDValue Vec =Op.getOperand(0);
3416SDValue Scl =Op.getOperand(1);
3417auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3418
3419// For a legal, constant insertion index, if we don't need this insertion
3420// then strip it, else remove it from the demanded elts.
3421if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3422unsignedIdx = CIdx->getZExtValue();
3423if (!DemandedElts[Idx])
3424return TLO.CombineTo(Op, Vec);
3425
3426APInt DemandedVecElts(DemandedElts);
3427 DemandedVecElts.clearBit(Idx);
3428if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3429 KnownZero, TLO,Depth + 1))
3430returntrue;
3431
3432 KnownUndef.setBitVal(Idx, Scl.isUndef());
3433
3434 KnownZero.setBitVal(Idx,isNullConstant(Scl) ||isNullFPConstant(Scl));
3435break;
3436 }
3437
3438APInt VecUndef, VecZero;
3439if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3440Depth + 1))
3441returntrue;
3442// Without knowing the insertion index we can't set KnownUndef/KnownZero.
3443break;
3444 }
3445caseISD::VSELECT: {
3446SDValue Sel =Op.getOperand(0);
3447SDValueLHS =Op.getOperand(1);
3448SDValueRHS =Op.getOperand(2);
3449
3450// Try to transform the select condition based on the current demanded
3451// elements.
3452APInt UndefSel, ZeroSel;
3453if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3454Depth + 1))
3455returntrue;
3456
3457// See if we can simplify either vselect operand.
3458APInt DemandedLHS(DemandedElts);
3459APInt DemandedRHS(DemandedElts);
3460APInt UndefLHS, ZeroLHS;
3461APInt UndefRHS, ZeroRHS;
3462if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3463Depth + 1))
3464returntrue;
3465if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3466Depth + 1))
3467returntrue;
3468
3469 KnownUndef = UndefLHS & UndefRHS;
3470 KnownZero = ZeroLHS & ZeroRHS;
3471
3472// If we know that the selected element is always zero, we don't need the
3473// select value element.
3474APInt DemandedSel = DemandedElts & ~KnownZero;
3475if (DemandedSel != DemandedElts)
3476if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3477Depth + 1))
3478returntrue;
3479
3480break;
3481 }
3482caseISD::VECTOR_SHUFFLE: {
3483SDValueLHS =Op.getOperand(0);
3484SDValueRHS =Op.getOperand(1);
3485ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3486
3487// Collect demanded elements from shuffle operands..
3488APInt DemandedLHS(NumElts, 0);
3489APInt DemandedRHS(NumElts, 0);
3490for (unsigned i = 0; i != NumElts; ++i) {
3491int M = ShuffleMask[i];
3492if (M < 0 || !DemandedElts[i])
3493continue;
3494assert(0 <= M && M < (int)(2 * NumElts) &&"Shuffle index out of range");
3495if (M < (int)NumElts)
3496 DemandedLHS.setBit(M);
3497else
3498 DemandedRHS.setBit(M - NumElts);
3499 }
3500
3501// See if we can simplify either shuffle operand.
3502APInt UndefLHS, ZeroLHS;
3503APInt UndefRHS, ZeroRHS;
3504if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3505Depth + 1))
3506returntrue;
3507if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3508Depth + 1))
3509returntrue;
3510
3511// Simplify mask using undef elements from LHS/RHS.
3512bool Updated =false;
3513bool IdentityLHS =true, IdentityRHS =true;
3514SmallVector<int, 32> NewMask(ShuffleMask);
3515for (unsigned i = 0; i != NumElts; ++i) {
3516int &M = NewMask[i];
3517if (M < 0)
3518continue;
3519if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3520 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3521 Updated =true;
3522 M = -1;
3523 }
3524 IdentityLHS &= (M < 0) || (M == (int)i);
3525 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3526 }
3527
3528// Update legal shuffle masks based on demanded elements if it won't reduce
3529// to Identity which can cause premature removal of the shuffle mask.
3530if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3531SDValue LegalShuffle =
3532 buildLegalVectorShuffle(VT,DL,LHS,RHS, NewMask, TLO.DAG);
3533if (LegalShuffle)
3534return TLO.CombineTo(Op, LegalShuffle);
3535 }
3536
3537// Propagate undef/zero elements from LHS/RHS.
3538for (unsigned i = 0; i != NumElts; ++i) {
3539int M = ShuffleMask[i];
3540if (M < 0) {
3541 KnownUndef.setBit(i);
3542 }elseif (M < (int)NumElts) {
3543if (UndefLHS[M])
3544 KnownUndef.setBit(i);
3545if (ZeroLHS[M])
3546 KnownZero.setBit(i);
3547 }else {
3548if (UndefRHS[M - NumElts])
3549 KnownUndef.setBit(i);
3550if (ZeroRHS[M - NumElts])
3551 KnownZero.setBit(i);
3552 }
3553 }
3554break;
3555 }
3556caseISD::ANY_EXTEND_VECTOR_INREG:
3557caseISD::SIGN_EXTEND_VECTOR_INREG:
3558caseISD::ZERO_EXTEND_VECTOR_INREG: {
3559APInt SrcUndef, SrcZero;
3560SDValue Src =Op.getOperand(0);
3561unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3562APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3563if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3564Depth + 1))
3565returntrue;
3566 KnownZero = SrcZero.zextOrTrunc(NumElts);
3567 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3568
3569if (IsLE &&Op.getOpcode() ==ISD::ANY_EXTEND_VECTOR_INREG &&
3570Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3571 DemandedSrcElts == 1) {
3572// aext - if we just need the bottom element then we can bitcast.
3573return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3574 }
3575
3576if (Op.getOpcode() ==ISD::ZERO_EXTEND_VECTOR_INREG) {
3577// zext(undef) upper bits are guaranteed to be zero.
3578if (DemandedElts.isSubsetOf(KnownUndef))
3579return TLO.CombineTo(Op, TLO.DAG.getConstant(0,SDLoc(Op), VT));
3580 KnownUndef.clearAllBits();
3581
3582// zext - if we just need the bottom element then we can mask:
3583// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3584if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() ==ISD::AND &&
3585Op->isOnlyUserOf(Src.getNode()) &&
3586Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3587SDLocDL(Op);
3588EVT SrcVT = Src.getValueType();
3589EVT SrcSVT = SrcVT.getScalarType();
3590SmallVector<SDValue> MaskElts;
3591 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3592 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0,DL, SrcSVT));
3593SDValue Mask = TLO.DAG.getBuildVector(SrcVT,DL, MaskElts);
3594if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3595ISD::AND,DL, SrcVT, {Src.getOperand(1), Mask})) {
3596 Fold = TLO.DAG.getNode(ISD::AND,DL, SrcVT, Src.getOperand(0), Fold);
3597return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3598 }
3599 }
3600 }
3601break;
3602 }
3603
3604// TODO: There are more binop opcodes that could be handled here - MIN,
3605// MAX, saturated math, etc.
3606caseISD::ADD: {
3607SDValue Op0 =Op.getOperand(0);
3608SDValue Op1 =Op.getOperand(1);
3609if (Op0 == Op1 &&Op->isOnlyUserOf(Op0.getNode())) {
3610APInt UndefLHS, ZeroLHS;
3611if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3612Depth + 1,/*AssumeSingleUse*/true))
3613returntrue;
3614 }
3615 [[fallthrough]];
3616 }
3617caseISD::AVGCEILS:
3618caseISD::AVGCEILU:
3619caseISD::AVGFLOORS:
3620caseISD::AVGFLOORU:
3621caseISD::OR:
3622caseISD::XOR:
3623caseISD::SUB:
3624caseISD::FADD:
3625caseISD::FSUB:
3626caseISD::FMUL:
3627caseISD::FDIV:
3628caseISD::FREM: {
3629SDValue Op0 =Op.getOperand(0);
3630SDValue Op1 =Op.getOperand(1);
3631
3632APInt UndefRHS, ZeroRHS;
3633if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3634Depth + 1))
3635returntrue;
3636APInt UndefLHS, ZeroLHS;
3637if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3638Depth + 1))
3639returntrue;
3640
3641 KnownZero = ZeroLHS & ZeroRHS;
3642 KnownUndef =getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3643
3644// Attempt to avoid multi-use ops if we don't need anything from them.
3645// TODO - use KnownUndef to relax the demandedelts?
3646if (!DemandedElts.isAllOnes())
3647if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3648returntrue;
3649break;
3650 }
3651caseISD::SHL:
3652caseISD::SRL:
3653caseISD::SRA:
3654caseISD::ROTL:
3655caseISD::ROTR: {
3656SDValue Op0 =Op.getOperand(0);
3657SDValue Op1 =Op.getOperand(1);
3658
3659APInt UndefRHS, ZeroRHS;
3660if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3661Depth + 1))
3662returntrue;
3663APInt UndefLHS, ZeroLHS;
3664if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3665Depth + 1))
3666returntrue;
3667
3668 KnownZero = ZeroLHS;
3669 KnownUndef = UndefLHS & UndefRHS;// TODO: use getKnownUndefForVectorBinop?
3670
3671// Attempt to avoid multi-use ops if we don't need anything from them.
3672// TODO - use KnownUndef to relax the demandedelts?
3673if (!DemandedElts.isAllOnes())
3674if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3675returntrue;
3676break;
3677 }
3678caseISD::MUL:
3679caseISD::MULHU:
3680caseISD::MULHS:
3681caseISD::AND: {
3682SDValue Op0 =Op.getOperand(0);
3683SDValue Op1 =Op.getOperand(1);
3684
3685APInt SrcUndef, SrcZero;
3686if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3687Depth + 1))
3688returntrue;
3689// If we know that a demanded element was zero in Op1 we don't need to
3690// demand it in Op0 - its guaranteed to be zero.
3691APInt DemandedElts0 = DemandedElts & ~SrcZero;
3692if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3693 TLO,Depth + 1))
3694returntrue;
3695
3696 KnownUndef &= DemandedElts0;
3697 KnownZero &= DemandedElts0;
3698
3699// If every element pair has a zero/undef then just fold to zero.
3700// fold (and x, undef) -> 0 / (and x, 0) -> 0
3701// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3702if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3703return TLO.CombineTo(Op, TLO.DAG.getConstant(0,SDLoc(Op), VT));
3704
3705// If either side has a zero element, then the result element is zero, even
3706// if the other is an UNDEF.
3707// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3708// and then handle 'and' nodes with the rest of the binop opcodes.
3709 KnownZero |= SrcZero;
3710 KnownUndef &= SrcUndef;
3711 KnownUndef &= ~KnownZero;
3712
3713// Attempt to avoid multi-use ops if we don't need anything from them.
3714if (!DemandedElts.isAllOnes())
3715if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3716returntrue;
3717break;
3718 }
3719caseISD::TRUNCATE:
3720caseISD::SIGN_EXTEND:
3721caseISD::ZERO_EXTEND:
3722if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3723 KnownZero, TLO,Depth + 1))
3724returntrue;
3725
3726if (!DemandedElts.isAllOnes())
3727if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3728Op.getOperand(0), DemandedElts, TLO.DAG,Depth + 1))
3729return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode,SDLoc(Op), VT, NewOp));
3730
3731if (Op.getOpcode() ==ISD::ZERO_EXTEND) {
3732// zext(undef) upper bits are guaranteed to be zero.
3733if (DemandedElts.isSubsetOf(KnownUndef))
3734return TLO.CombineTo(Op, TLO.DAG.getConstant(0,SDLoc(Op), VT));
3735 KnownUndef.clearAllBits();
3736 }
3737break;
3738caseISD::SINT_TO_FP:
3739caseISD::UINT_TO_FP:
3740caseISD::FP_TO_SINT:
3741caseISD::FP_TO_UINT:
3742if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3743 KnownZero, TLO,Depth + 1))
3744returntrue;
3745// Don't fall through to generic undef -> undef handling.
3746returnfalse;
3747default: {
3748if (Op.getOpcode() >=ISD::BUILTIN_OP_END) {
3749if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3750 KnownZero, TLO,Depth))
3751returntrue;
3752 }else {
3753KnownBits Known;
3754APIntDemandedBits =APInt::getAllOnes(EltSizeInBits);
3755if (SimplifyDemandedBits(Op,DemandedBits, OriginalDemandedElts, Known,
3756 TLO,Depth, AssumeSingleUse))
3757returntrue;
3758 }
3759break;
3760 }
3761 }
3762assert((KnownUndef & KnownZero) == 0 &&"Elements flagged as undef AND zero");
3763
3764// Constant fold all undef cases.
3765// TODO: Handle zero cases as well.
3766if (DemandedElts.isSubsetOf(KnownUndef))
3767return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3768
3769returnfalse;
3770}
3771
3772/// Determine which of the bits specified in Mask are known to be either zero or
3773/// one and return them in the Known.
3774voidTargetLowering::computeKnownBitsForTargetNode(constSDValueOp,
3775KnownBits &Known,
3776constAPInt &DemandedElts,
3777constSelectionDAG &DAG,
3778unsignedDepth) const{
3779assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3780Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3781Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3782Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3783"Should use MaskedValueIsZero if you don't know whether Op"
3784" is a target node!");
3785 Known.resetAll();
3786}
3787
3788voidTargetLowering::computeKnownBitsForTargetInstr(
3789GISelKnownBits &Analysis,Register R,KnownBits &Known,
3790constAPInt &DemandedElts,constMachineRegisterInfo &MRI,
3791unsignedDepth) const{
3792 Known.resetAll();
3793}
3794
3795voidTargetLowering::computeKnownBitsForFrameIndex(
3796constint FrameIdx,KnownBits &Known,constMachineFunction &MF) const{
3797// The low bits are known zero if the pointer is aligned.
3798 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3799}
3800
3801AlignTargetLowering::computeKnownAlignForTargetInstr(
3802GISelKnownBits &Analysis,Register R,constMachineRegisterInfo &MRI,
3803unsignedDepth) const{
3804returnAlign(1);
3805}
3806
3807/// This method can be implemented by targets that want to expose additional
3808/// information about sign bits to the DAG Combiner.
3809unsignedTargetLowering::ComputeNumSignBitsForTargetNode(SDValueOp,
3810constAPInt &,
3811constSelectionDAG &,
3812unsignedDepth) const{
3813assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3814Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3815Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3816Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3817"Should use ComputeNumSignBits if you don't know whether Op"
3818" is a target node!");
3819return 1;
3820}
3821
3822unsignedTargetLowering::computeNumSignBitsForTargetInstr(
3823GISelKnownBits &Analysis,Register R,constAPInt &DemandedElts,
3824constMachineRegisterInfo &MRI,unsignedDepth) const{
3825return 1;
3826}
3827
3828boolTargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3829SDValueOp,constAPInt &DemandedElts,APInt &KnownUndef,APInt &KnownZero,
3830TargetLoweringOpt &TLO,unsignedDepth) const{
3831assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3832Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3833Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3834Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3835"Should use SimplifyDemandedVectorElts if you don't know whether Op"
3836" is a target node!");
3837returnfalse;
3838}
3839
3840boolTargetLowering::SimplifyDemandedBitsForTargetNode(
3841SDValueOp,constAPInt &DemandedBits,constAPInt &DemandedElts,
3842KnownBits &Known,TargetLoweringOpt &TLO,unsignedDepth) const{
3843assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3844Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3845Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3846Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3847"Should use SimplifyDemandedBits if you don't know whether Op"
3848" is a target node!");
3849 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG,Depth);
3850returnfalse;
3851}
3852
3853SDValueTargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3854SDValueOp,constAPInt &DemandedBits,constAPInt &DemandedElts,
3855SelectionDAG &DAG,unsignedDepth) const{
3856assert(
3857 (Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3858Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3859Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3860Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3861"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3862" is a target node!");
3863returnSDValue();
3864}
3865
3866SDValue
3867TargetLowering::buildLegalVectorShuffle(EVT VT,constSDLoc &DL,SDValue N0,
3868SDValue N1,MutableArrayRef<int> Mask,
3869SelectionDAG &DAG) const{
3870bool LegalMask =isShuffleMaskLegal(Mask, VT);
3871if (!LegalMask) {
3872std::swap(N0, N1);
3873ShuffleVectorSDNode::commuteMask(Mask);
3874 LegalMask =isShuffleMaskLegal(Mask, VT);
3875 }
3876
3877if (!LegalMask)
3878returnSDValue();
3879
3880return DAG.getVectorShuffle(VT,DL, N0, N1, Mask);
3881}
3882
3883constConstant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const{
3884returnnullptr;
3885}
3886
3887boolTargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3888SDValueOp,constAPInt &DemandedElts,constSelectionDAG &DAG,
3889boolPoisonOnly,unsignedDepth) const{
3890assert(
3891 (Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3892Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3893Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3894Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3895"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3896" is a target node!");
3897
3898// If Op can't create undef/poison and none of its operands are undef/poison
3899// then Op is never undef/poison.
3900return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG,PoisonOnly,
3901/*ConsiderFlags*/true,Depth) &&
3902all_of(Op->ops(), [&](SDValue V) {
3903 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3904 Depth + 1);
3905 });
3906}
3907
3908boolTargetLowering::canCreateUndefOrPoisonForTargetNode(
3909SDValueOp,constAPInt &DemandedElts,constSelectionDAG &DAG,
3910boolPoisonOnly,bool ConsiderFlags,unsignedDepth) const{
3911assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3912Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3913Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3914Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3915"Should use canCreateUndefOrPoison if you don't know whether Op"
3916" is a target node!");
3917// Be conservative and return true.
3918returntrue;
3919}
3920
3921boolTargetLowering::isKnownNeverNaNForTargetNode(SDValueOp,
3922constSelectionDAG &DAG,
3923bool SNaN,
3924unsignedDepth) const{
3925assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3926Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3927Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3928Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3929"Should use isKnownNeverNaN if you don't know whether Op"
3930" is a target node!");
3931returnfalse;
3932}
3933
3934boolTargetLowering::isSplatValueForTargetNode(SDValueOp,
3935constAPInt &DemandedElts,
3936APInt &UndefElts,
3937constSelectionDAG &DAG,
3938unsignedDepth) const{
3939assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||
3940Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||
3941Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||
3942Op.getOpcode() ==ISD::INTRINSIC_VOID) &&
3943"Should use isSplatValue if you don't know whether Op"
3944" is a target node!");
3945returnfalse;
3946}
3947
3948// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3949// work with truncating build vectors and vectors with elements of less than
3950// 8 bits.
3951boolTargetLowering::isConstTrueVal(SDValueN) const{
3952if (!N)
3953returnfalse;
3954
3955unsigned EltWidth;
3956APInt CVal;
3957if (ConstantSDNode *CN =isConstOrConstSplat(N,/*AllowUndefs=*/false,
3958/*AllowTruncation=*/true)) {
3959 CVal = CN->getAPIntValue();
3960 EltWidth =N.getValueType().getScalarSizeInBits();
3961 }else
3962returnfalse;
3963
3964// If this is a truncating splat, truncate the splat value.
3965// Otherwise, we may fail to match the expected values below.
3966if (EltWidth < CVal.getBitWidth())
3967 CVal = CVal.trunc(EltWidth);
3968
3969switch (getBooleanContents(N.getValueType())) {
3970caseUndefinedBooleanContent:
3971return CVal[0];
3972caseZeroOrOneBooleanContent:
3973return CVal.isOne();
3974caseZeroOrNegativeOneBooleanContent:
3975return CVal.isAllOnes();
3976 }
3977
3978llvm_unreachable("Invalid boolean contents");
3979}
3980
3981boolTargetLowering::isConstFalseVal(SDValueN) const{
3982if (!N)
3983returnfalse;
3984
3985constConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3986if (!CN) {
3987constBuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3988if (!BV)
3989returnfalse;
3990
3991// Only interested in constant splats, we don't care about undef
3992// elements in identifying boolean constants and getConstantSplatNode
3993// returns NULL if all ops are undef;
3994 CN = BV->getConstantSplatNode();
3995if (!CN)
3996returnfalse;
3997 }
3998
3999if (getBooleanContents(N->getValueType(0)) ==UndefinedBooleanContent)
4000return !CN->getAPIntValue()[0];
4001
4002return CN->isZero();
4003}
4004
4005boolTargetLowering::isExtendedTrueVal(constConstantSDNode *N,EVT VT,
4006bool SExt) const{
4007if (VT == MVT::i1)
4008returnN->isOne();
4009
4010TargetLowering::BooleanContent Cnt =getBooleanContents(VT);
4011switch (Cnt) {
4012caseTargetLowering::ZeroOrOneBooleanContent:
4013// An extended value of 1 is always true, unless its original type is i1,
4014// in which case it will be sign extended to -1.
4015return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4016caseTargetLowering::UndefinedBooleanContent:
4017caseTargetLowering::ZeroOrNegativeOneBooleanContent:
4018returnN->isAllOnes() && SExt;
4019 }
4020llvm_unreachable("Unexpected enumeration.");
4021}
4022
4023/// This helper function of SimplifySetCC tries to optimize the comparison when
4024/// either operand of the SetCC node is a bitwise-and instruction.
4025SDValue TargetLowering::foldSetCCWithAnd(EVT VT,SDValue N0,SDValue N1,
4026ISD::CondCodeCond,constSDLoc &DL,
4027 DAGCombinerInfo &DCI) const{
4028if (N1.getOpcode() ==ISD::AND && N0.getOpcode() !=ISD::AND)
4029std::swap(N0, N1);
4030
4031SelectionDAG &DAG = DCI.DAG;
4032EVT OpVT = N0.getValueType();
4033if (N0.getOpcode() !=ISD::AND || !OpVT.isInteger() ||
4034 (Cond !=ISD::SETEQ &&Cond !=ISD::SETNE))
4035returnSDValue();
4036
4037// (X & Y) != 0 --> zextOrTrunc(X & Y)
4038// iff everything but LSB is known zero:
4039if (Cond ==ISD::SETNE &&isNullConstant(N1) &&
4040 (getBooleanContents(OpVT) ==TargetLowering::UndefinedBooleanContent ||
4041getBooleanContents(OpVT) ==TargetLowering::ZeroOrOneBooleanContent)) {
4042unsigned NumEltBits = OpVT.getScalarSizeInBits();
4043APInt UpperBits =APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4044if (DAG.MaskedValueIsZero(N0, UpperBits))
4045return DAG.getBoolExtOrTrunc(N0,DL, VT, OpVT);
4046 }
4047
4048// Try to eliminate a power-of-2 mask constant by converting to a signbit
4049// test in a narrow type that we can truncate to with no cost. Examples:
4050// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4051// (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4052// TODO: This conservatively checks for type legality on the source and
4053// destination types. That may inhibit optimizations, but it also
4054// allows setcc->shift transforms that may be more beneficial.
4055auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4056if (AndC &&isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4057isTypeLegal(OpVT) && N0.hasOneUse()) {
4058EVT NarrowVT =EVT::getIntegerVT(*DAG.getContext(),
4059 AndC->getAPIntValue().getActiveBits());
4060if (isTruncateFree(OpVT, NarrowVT) &&isTypeLegal(NarrowVT)) {
4061SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0),DL, NarrowVT);
4062SDValue Zero = DAG.getConstant(0,DL, NarrowVT);
4063return DAG.getSetCC(DL, VT, Trunc, Zero,
4064Cond ==ISD::SETEQ ?ISD::SETGE :ISD::SETLT);
4065 }
4066 }
4067
4068// Match these patterns in any of their permutations:
4069// (X & Y) == Y
4070// (X & Y) != Y
4071SDValueX,Y;
4072if (N0.getOperand(0) == N1) {
4073X = N0.getOperand(1);
4074Y = N0.getOperand(0);
4075 }elseif (N0.getOperand(1) == N1) {
4076X = N0.getOperand(0);
4077Y = N0.getOperand(1);
4078 }else {
4079returnSDValue();
4080 }
4081
4082// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4083// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4084// its liable to create and infinite loop.
4085SDValueZero = DAG.getConstant(0,DL, OpVT);
4086if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4087 DAG.isKnownToBeAPowerOfTwo(Y)) {
4088// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4089// Note that where Y is variable and is known to have at most one bit set
4090// (for example, if it is Z & 1) we cannot do this; the expressions are not
4091// equivalent when Y == 0.
4092assert(OpVT.isInteger());
4093Cond =ISD::getSetCCInverse(Cond, OpVT);
4094if (DCI.isBeforeLegalizeOps() ||
4095isCondCodeLegal(Cond, N0.getSimpleValueType()))
4096return DAG.getSetCC(DL, VT, N0, Zero,Cond);
4097 }elseif (N0.hasOneUse() &&hasAndNotCompare(Y)) {
4098// If the target supports an 'and-not' or 'and-complement' logic operation,
4099// try to use that to make a comparison operation more efficient.
4100// But don't do this transform if the mask is a single bit because there are
4101// more efficient ways to deal with that case (for example, 'bt' on x86 or
4102// 'rlwinm' on PPC).
4103
4104// Bail out if the compare operand that we want to turn into a zero is
4105// already a zero (otherwise, infinite loop).
4106if (isNullConstant(Y))
4107returnSDValue();
4108
4109// Transform this into: ~X & Y == 0.
4110SDValue NotX = DAG.getNOT(SDLoc(X),X, OpVT);
4111SDValue NewAnd = DAG.getNode(ISD::AND,SDLoc(N0), OpVT, NotX,Y);
4112return DAG.getSetCC(DL, VT, NewAnd, Zero,Cond);
4113 }
4114
4115returnSDValue();
4116}
4117
4118/// There are multiple IR patterns that could be checking whether certain
4119/// truncation of a signed number would be lossy or not. The pattern which is
4120/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4121/// We are looking for the following pattern: (KeptBits is a constant)
4122/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4123/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4124/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4125/// We will unfold it into the natural trunc+sext pattern:
4126/// ((%x << C) a>> C) dstcond %x
4127/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4128SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4129EVT SCCVT,SDValue N0,SDValue N1,ISD::CondCodeCond, DAGCombinerInfo &DCI,
4130constSDLoc &DL) const{
4131// We must be comparing with a constant.
4132ConstantSDNode *C1;
4133if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4134returnSDValue();
4135
4136// N0 should be: add %x, (1 << (KeptBits-1))
4137if (N0->getOpcode() !=ISD::ADD)
4138returnSDValue();
4139
4140// And we must be 'add'ing a constant.
4141ConstantSDNode *C01;
4142if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4143returnSDValue();
4144
4145SDValueX = N0->getOperand(0);
4146EVT XVT =X.getValueType();
4147
4148// Validate constants ...
4149
4150APIntI1 = C1->getAPIntValue();
4151
4152ISD::CondCode NewCond;
4153if (Cond ==ISD::CondCode::SETULT) {
4154 NewCond =ISD::CondCode::SETEQ;
4155 }elseif (Cond ==ISD::CondCode::SETULE) {
4156 NewCond =ISD::CondCode::SETEQ;
4157// But need to 'canonicalize' the constant.
4158I1 += 1;
4159 }elseif (Cond ==ISD::CondCode::SETUGT) {
4160 NewCond =ISD::CondCode::SETNE;
4161// But need to 'canonicalize' the constant.
4162I1 += 1;
4163 }elseif (Cond ==ISD::CondCode::SETUGE) {
4164 NewCond =ISD::CondCode::SETNE;
4165 }else
4166returnSDValue();
4167
4168APInt I01 = C01->getAPIntValue();
4169
4170auto checkConstants = [&I1, &I01]() ->bool {
4171// Both of them must be power-of-two, and the constant from setcc is bigger.
4172returnI1.ugt(I01) &&I1.isPowerOf2() && I01.isPowerOf2();
4173 };
4174
4175if (checkConstants()) {
4176// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4177 }else {
4178// What if we invert constants? (and the target predicate)
4179I1.negate();
4180 I01.negate();
4181assert(XVT.isInteger());
4182 NewCond =getSetCCInverse(NewCond, XVT);
4183if (!checkConstants())
4184returnSDValue();
4185// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4186 }
4187
4188// They are power-of-two, so which bit is set?
4189constunsigned KeptBits =I1.logBase2();
4190constunsigned KeptBitsMinusOne = I01.logBase2();
4191
4192// Magic!
4193if (KeptBits != (KeptBitsMinusOne + 1))
4194returnSDValue();
4195assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() &&"unreachable");
4196
4197// We don't want to do this in every single case.
4198SelectionDAG &DAG = DCI.DAG;
4199if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4200returnSDValue();
4201
4202// Unfold into: sext_inreg(%x) cond %x
4203// Where 'cond' will be either 'eq' or 'ne'.
4204SDValue SExtInReg = DAG.getNode(
4205ISD::SIGN_EXTEND_INREG,DL, XVT,X,
4206 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4207return DAG.getSetCC(DL, SCCVT, SExtInReg,X, NewCond);
4208}
4209
4210// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4211SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4212EVT SCCVT,SDValue N0,SDValue N1C,ISD::CondCodeCond,
4213 DAGCombinerInfo &DCI,constSDLoc &DL) const{
4214assert(isConstOrConstSplat(N1C) &&isConstOrConstSplat(N1C)->isZero() &&
4215"Should be a comparison with 0.");
4216assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
4217"Valid only for [in]equality comparisons.");
4218
4219unsigned NewShiftOpcode;
4220SDValueX,C,Y;
4221
4222SelectionDAG &DAG = DCI.DAG;
4223
4224// Look for '(C l>>/<< Y)'.
4225autoMatch = [&NewShiftOpcode, &X, &C, &Y, &DAG,this](SDValueV) {
4226// The shift should be one-use.
4227if (!V.hasOneUse())
4228returnfalse;
4229unsigned OldShiftOpcode =V.getOpcode();
4230switch (OldShiftOpcode) {
4231caseISD::SHL:
4232 NewShiftOpcode =ISD::SRL;
4233break;
4234caseISD::SRL:
4235 NewShiftOpcode =ISD::SHL;
4236break;
4237default:
4238returnfalse;// must be a logical shift.
4239 }
4240// We should be shifting a constant.
4241// FIXME: best to use isConstantOrConstantVector().
4242C =V.getOperand(0);
4243ConstantSDNode *CC =
4244isConstOrConstSplat(C,/*AllowUndefs=*/true,/*AllowTruncation=*/true);
4245if (!CC)
4246returnfalse;
4247Y =V.getOperand(1);
4248
4249ConstantSDNode *XC =
4250isConstOrConstSplat(X,/*AllowUndefs=*/true,/*AllowTruncation=*/true);
4251returnshouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4252X, XC,CC,Y, OldShiftOpcode, NewShiftOpcode, DAG);
4253 };
4254
4255// LHS of comparison should be an one-use 'and'.
4256if (N0.getOpcode() !=ISD::AND || !N0.hasOneUse())
4257returnSDValue();
4258
4259X = N0.getOperand(0);
4260SDValueMask = N0.getOperand(1);
4261
4262// 'and' is commutative!
4263if (!Match(Mask)) {
4264std::swap(X, Mask);
4265if (!Match(Mask))
4266returnSDValue();
4267 }
4268
4269EVT VT =X.getValueType();
4270
4271// Produce:
4272// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4273SDValue T0 = DAG.getNode(NewShiftOpcode,DL, VT,X,Y);
4274SDValueT1 = DAG.getNode(ISD::AND,DL, VT, T0,C);
4275SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C,Cond);
4276return T2;
4277}
4278
4279/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4280/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4281/// handle the commuted versions of these patterns.
4282SDValue TargetLowering::foldSetCCWithBinOp(EVT VT,SDValue N0,SDValue N1,
4283ISD::CondCodeCond,constSDLoc &DL,
4284 DAGCombinerInfo &DCI) const{
4285unsigned BOpcode = N0.getOpcode();
4286assert((BOpcode ==ISD::ADD || BOpcode ==ISD::SUB || BOpcode ==ISD::XOR) &&
4287"Unexpected binop");
4288assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&"Unexpected condcode");
4289
4290// (X + Y) == X --> Y == 0
4291// (X - Y) == X --> Y == 0
4292// (X ^ Y) == X --> Y == 0
4293SelectionDAG &DAG = DCI.DAG;
4294EVT OpVT = N0.getValueType();
4295SDValueX = N0.getOperand(0);
4296SDValueY = N0.getOperand(1);
4297if (X == N1)
4298return DAG.getSetCC(DL, VT,Y, DAG.getConstant(0,DL, OpVT),Cond);
4299
4300if (Y != N1)
4301returnSDValue();
4302
4303// (X + Y) == Y --> X == 0
4304// (X ^ Y) == Y --> X == 0
4305if (BOpcode ==ISD::ADD || BOpcode ==ISD::XOR)
4306return DAG.getSetCC(DL, VT,X, DAG.getConstant(0,DL, OpVT),Cond);
4307
4308// The shift would not be valid if the operands are boolean (i1).
4309if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4310returnSDValue();
4311
4312// (X - Y) == Y --> X == Y << 1
4313SDValue One = DAG.getShiftAmountConstant(1, OpVT,DL);
4314SDValue YShl1 = DAG.getNode(ISD::SHL,DL, N1.getValueType(),Y, One);
4315if (!DCI.isCalledByLegalizer())
4316 DCI.AddToWorklist(YShl1.getNode());
4317return DAG.getSetCC(DL, VT,X, YShl1,Cond);
4318}
4319
4320staticSDValuesimplifySetCCWithCTPOP(constTargetLowering &TLI,EVT VT,
4321SDValue N0,constAPInt &C1,
4322ISD::CondCodeCond,constSDLoc &dl,
4323SelectionDAG &DAG) {
4324// Look through truncs that don't change the value of a ctpop.
4325// FIXME: Add vector support? Need to be careful with setcc result type below.
4326SDValue CTPOP = N0;
4327if (N0.getOpcode() ==ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4328 N0.getScalarValueSizeInBits() >Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4329 CTPOP = N0.getOperand(0);
4330
4331if (CTPOP.getOpcode() !=ISD::CTPOP || !CTPOP.hasOneUse())
4332returnSDValue();
4333
4334EVT CTVT = CTPOP.getValueType();
4335SDValue CTOp = CTPOP.getOperand(0);
4336
4337// Expand a power-of-2-or-zero comparison based on ctpop:
4338// (ctpop x) u< 2 -> (x & x-1) == 0
4339// (ctpop x) u> 1 -> (x & x-1) != 0
4340if (Cond ==ISD::SETULT ||Cond ==ISD::SETUGT) {
4341// Keep the CTPOP if it is a cheap vector op.
4342if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4343returnSDValue();
4344
4345unsigned CostLimit = TLI.getCustomCtpopCost(CTVT,Cond);
4346if (C1.ugt(CostLimit + (Cond ==ISD::SETULT)))
4347returnSDValue();
4348if (C1 == 0 && (Cond ==ISD::SETULT))
4349returnSDValue();// This is handled elsewhere.
4350
4351unsignedPasses = C1.getLimitedValue() - (Cond ==ISD::SETULT);
4352
4353SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4354SDValue Result = CTOp;
4355for (unsigned i = 0; i <Passes; i++) {
4356SDValueAdd = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4357 Result = DAG.getNode(ISD::AND, dl, CTVT, Result,Add);
4358 }
4359ISD::CondCodeCC =Cond ==ISD::SETULT ?ISD::SETEQ :ISD::SETNE;
4360return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT),CC);
4361 }
4362
4363// Expand a power-of-2 comparison based on ctpop
4364if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) && C1 == 1) {
4365// Keep the CTPOP if it is cheap.
4366if (TLI.isCtpopFast(CTVT))
4367returnSDValue();
4368
4369SDValue Zero = DAG.getConstant(0, dl, CTVT);
4370SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4371assert(CTVT.isInteger());
4372SDValueAdd = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4373
4374// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4375// check before emitting a potentially unnecessary op.
4376if (DAG.isKnownNeverZero(CTOp)) {
4377// (ctpop x) == 1 --> (x & x-1) == 0
4378// (ctpop x) != 1 --> (x & x-1) != 0
4379SDValueAnd = DAG.getNode(ISD::AND, dl, CTVT, CTOp,Add);
4380SDValueRHS = DAG.getSetCC(dl, VT,And, Zero,Cond);
4381returnRHS;
4382 }
4383
4384// (ctpop x) == 1 --> (x ^ x-1) > x-1
4385// (ctpop x) != 1 --> (x ^ x-1) <= x-1
4386SDValueXor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp,Add);
4387ISD::CondCode CmpCond =Cond ==ISD::SETEQ ?ISD::SETUGT :ISD::SETULE;
4388return DAG.getSetCC(dl, VT,Xor,Add, CmpCond);
4389 }
4390
4391returnSDValue();
4392}
4393
4394staticSDValuefoldSetCCWithRotate(EVT VT,SDValue N0,SDValue N1,
4395ISD::CondCodeCond,constSDLoc &dl,
4396SelectionDAG &DAG) {
4397if (Cond !=ISD::SETEQ &&Cond !=ISD::SETNE)
4398returnSDValue();
4399
4400auto *C1 =isConstOrConstSplat(N1,/* AllowUndefs */true);
4401if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4402returnSDValue();
4403
4404auto getRotateSource = [](SDValueX) {
4405if (X.getOpcode() ==ISD::ROTL ||X.getOpcode() ==ISD::ROTR)
4406returnX.getOperand(0);
4407returnSDValue();
4408 };
4409
4410// Peek through a rotated value compared against 0 or -1:
4411// (rot X, Y) == 0/-1 --> X == 0/-1
4412// (rot X, Y) != 0/-1 --> X != 0/-1
4413if (SDValue R = getRotateSource(N0))
4414return DAG.getSetCC(dl, VT, R, N1,Cond);
4415
4416// Peek through an 'or' of a rotated value compared against 0:
4417// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4418// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4419//
4420// TODO: Add the 'and' with -1 sibling.
4421// TODO: Recurse through a series of 'or' ops to find the rotate.
4422EVT OpVT = N0.getValueType();
4423if (N0.hasOneUse() && N0.getOpcode() ==ISD::OR && C1->isZero()) {
4424if (SDValue R = getRotateSource(N0.getOperand(0))) {
4425SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4426return DAG.getSetCC(dl, VT, NewOr, N1,Cond);
4427 }
4428if (SDValue R = getRotateSource(N0.getOperand(1))) {
4429SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4430return DAG.getSetCC(dl, VT, NewOr, N1,Cond);
4431 }
4432 }
4433
4434returnSDValue();
4435}
4436
4437staticSDValuefoldSetCCWithFunnelShift(EVT VT,SDValue N0,SDValue N1,
4438ISD::CondCodeCond,constSDLoc &dl,
4439SelectionDAG &DAG) {
4440// If we are testing for all-bits-clear, we might be able to do that with
4441// less shifting since bit-order does not matter.
4442if (Cond !=ISD::SETEQ &&Cond !=ISD::SETNE)
4443returnSDValue();
4444
4445auto *C1 =isConstOrConstSplat(N1,/* AllowUndefs */true);
4446if (!C1 || !C1->isZero())
4447returnSDValue();
4448
4449if (!N0.hasOneUse() ||
4450 (N0.getOpcode() !=ISD::FSHL && N0.getOpcode() !=ISD::FSHR))
4451returnSDValue();
4452
4453unsignedBitWidth = N0.getScalarValueSizeInBits();
4454auto *ShAmtC =isConstOrConstSplat(N0.getOperand(2));
4455if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4456returnSDValue();
4457
4458// Canonicalize fshr as fshl to reduce pattern-matching.
4459unsigned ShAmt = ShAmtC->getZExtValue();
4460if (N0.getOpcode() ==ISD::FSHR)
4461 ShAmt =BitWidth - ShAmt;
4462
4463// Match an 'or' with a specific operand 'Other' in either commuted variant.
4464SDValueX,Y;
4465auto matchOr = [&X, &Y](SDValueOr,SDValueOther) {
4466if (Or.getOpcode() !=ISD::OR || !Or.hasOneUse())
4467returnfalse;
4468if (Or.getOperand(0) ==Other) {
4469X =Or.getOperand(0);
4470Y =Or.getOperand(1);
4471returntrue;
4472 }
4473if (Or.getOperand(1) ==Other) {
4474X =Or.getOperand(1);
4475Y =Or.getOperand(0);
4476returntrue;
4477 }
4478returnfalse;
4479 };
4480
4481EVT OpVT = N0.getValueType();
4482EVT ShAmtVT = N0.getOperand(2).getValueType();
4483SDValue F0 = N0.getOperand(0);
4484SDValue F1 = N0.getOperand(1);
4485if (matchOr(F0, F1)) {
4486// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4487SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4488SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT,Y, NewShAmt);
4489SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift,X);
4490return DAG.getSetCC(dl, VT, NewOr, N1,Cond);
4491 }
4492if (matchOr(F1, F0)) {
4493// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4494SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4495SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT,Y, NewShAmt);
4496SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift,X);
4497return DAG.getSetCC(dl, VT, NewOr, N1,Cond);
4498 }
4499
4500returnSDValue();
4501}
4502
4503/// Try to simplify a setcc built with the specified operands and cc. If it is
4504/// unable to simplify it, return a null SDValue.
4505SDValueTargetLowering::SimplifySetCC(EVT VT,SDValue N0,SDValue N1,
4506ISD::CondCodeCond,bool foldBooleans,
4507DAGCombinerInfo &DCI,
4508constSDLoc &dl) const{
4509SelectionDAG &DAG = DCI.DAG;
4510constDataLayout &Layout = DAG.getDataLayout();
4511EVT OpVT = N0.getValueType();
4512AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4513
4514// Constant fold or commute setcc.
4515if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1,Cond, dl))
4516return Fold;
4517
4518bool N0ConstOrSplat =
4519isConstOrConstSplat(N0,/*AllowUndefs*/false,/*AllowTruncate*/true);
4520bool N1ConstOrSplat =
4521isConstOrConstSplat(N1,/*AllowUndefs*/false,/*AllowTruncate*/true);
4522
4523// Canonicalize toward having the constant on the RHS.
4524// TODO: Handle non-splat vector constants. All undef causes trouble.
4525// FIXME: We can't yet fold constant scalable vector splats, so avoid an
4526// infinite loop here when we encounter one.
4527ISD::CondCode SwappedCC =ISD::getSetCCSwappedOperands(Cond);
4528if (N0ConstOrSplat && !N1ConstOrSplat &&
4529 (DCI.isBeforeLegalizeOps() ||
4530isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4531return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4532
4533// If we have a subtract with the same 2 non-constant operands as this setcc
4534// -- but in reverse order -- then try to commute the operands of this setcc
4535// to match. A matching pair of setcc (cmp) and sub may be combined into 1
4536// instruction on some targets.
4537if (!N0ConstOrSplat && !N1ConstOrSplat &&
4538 (DCI.isBeforeLegalizeOps() ||
4539isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4540 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4541 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4542return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4543
4544if (SDValue V =foldSetCCWithRotate(VT, N0, N1,Cond, dl, DAG))
4545return V;
4546
4547if (SDValue V =foldSetCCWithFunnelShift(VT, N0, N1,Cond, dl, DAG))
4548return V;
4549
4550if (auto *N1C =isConstOrConstSplat(N1)) {
4551constAPInt &C1 = N1C->getAPIntValue();
4552
4553// Optimize some CTPOP cases.
4554if (SDValue V =simplifySetCCWithCTPOP(*this, VT, N0, C1,Cond, dl, DAG))
4555return V;
4556
4557// For equality to 0 of a no-wrap multiply, decompose and test each op:
4558// X * Y == 0 --> (X == 0) || (Y == 0)
4559// X * Y != 0 --> (X != 0) && (Y != 0)
4560// TODO: This bails out if minsize is set, but if the target doesn't have a
4561// single instruction multiply for this type, it would likely be
4562// smaller to decompose.
4563if (C1.isZero() && (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
4564 N0.getOpcode() ==ISD::MUL && N0.hasOneUse() &&
4565 (N0->getFlags().hasNoUnsignedWrap() ||
4566 N0->getFlags().hasNoSignedWrap()) &&
4567 !Attr.hasFnAttr(Attribute::MinSize)) {
4568SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1,Cond);
4569SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1,Cond);
4570unsigned LogicOp =Cond ==ISD::SETEQ ?ISD::OR :ISD::AND;
4571return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4572 }
4573
4574// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4575// equality comparison, then we're just comparing whether X itself is
4576// zero.
4577if (N0.getOpcode() ==ISD::SRL && (C1.isZero() || C1.isOne()) &&
4578 N0.getOperand(0).getOpcode() ==ISD::CTLZ &&
4579 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4580if (ConstantSDNode *ShAmt =isConstOrConstSplat(N0.getOperand(1))) {
4581if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
4582 ShAmt->getAPIntValue() ==Log2_32(N0.getScalarValueSizeInBits())) {
4583if ((C1 == 0) == (Cond ==ISD::SETEQ)) {
4584// (srl (ctlz x), 5) == 0 -> X != 0
4585// (srl (ctlz x), 5) != 1 -> X != 0
4586Cond =ISD::SETNE;
4587 }else {
4588// (srl (ctlz x), 5) != 0 -> X == 0
4589// (srl (ctlz x), 5) == 1 -> X == 0
4590Cond =ISD::SETEQ;
4591 }
4592SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4593return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4594Cond);
4595 }
4596 }
4597 }
4598 }
4599
4600// FIXME: Support vectors.
4601if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4602constAPInt &C1 = N1C->getAPIntValue();
4603
4604// (zext x) == C --> x == (trunc C)
4605// (sext x) == C --> x == (trunc C)
4606if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
4607 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4608unsigned MinBits = N0.getValueSizeInBits();
4609SDValue PreExt;
4610boolSigned =false;
4611if (N0->getOpcode() ==ISD::ZERO_EXTEND) {
4612// ZExt
4613 MinBits = N0->getOperand(0).getValueSizeInBits();
4614 PreExt = N0->getOperand(0);
4615 }elseif (N0->getOpcode() ==ISD::AND) {
4616// DAGCombine turns costly ZExts into ANDs
4617if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4618if ((C->getAPIntValue()+1).isPowerOf2()) {
4619 MinBits =C->getAPIntValue().countr_one();
4620 PreExt = N0->getOperand(0);
4621 }
4622 }elseif (N0->getOpcode() ==ISD::SIGN_EXTEND) {
4623// SExt
4624 MinBits = N0->getOperand(0).getValueSizeInBits();
4625 PreExt = N0->getOperand(0);
4626Signed =true;
4627 }elseif (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4628// ZEXTLOAD / SEXTLOAD
4629if (LN0->getExtensionType() ==ISD::ZEXTLOAD) {
4630 MinBits = LN0->getMemoryVT().getSizeInBits();
4631 PreExt = N0;
4632 }elseif (LN0->getExtensionType() ==ISD::SEXTLOAD) {
4633Signed =true;
4634 MinBits = LN0->getMemoryVT().getSizeInBits();
4635 PreExt = N0;
4636 }
4637 }
4638
4639// Figure out how many bits we need to preserve this constant.
4640unsigned ReqdBits =Signed ? C1.getSignificantBits() : C1.getActiveBits();
4641
4642// Make sure we're not losing bits from the constant.
4643if (MinBits > 0 &&
4644 MinBits < C1.getBitWidth() &&
4645 MinBits >= ReqdBits) {
4646EVT MinVT =EVT::getIntegerVT(*DAG.getContext(), MinBits);
4647if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4648// Will get folded away.
4649SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4650if (MinBits == 1 && C1 == 1)
4651// Invert the condition.
4652return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4653Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);
4654SDValueC = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4655return DAG.getSetCC(dl, VT, Trunc,C,Cond);
4656 }
4657
4658// If truncating the setcc operands is not desirable, we can still
4659// simplify the expression in some cases:
4660// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4661// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4662// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4663// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4664// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4665// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4666SDValue TopSetCC = N0->getOperand(0);
4667unsigned N0Opc = N0->getOpcode();
4668bool SExt = (N0Opc ==ISD::SIGN_EXTEND);
4669if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4670 TopSetCC.getOpcode() ==ISD::SETCC &&
4671 (N0Opc ==ISD::ZERO_EXTEND || N0Opc ==ISD::SIGN_EXTEND) &&
4672 (isConstFalseVal(N1) ||
4673 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4674
4675boolInverse = (N1C->isZero() &&Cond ==ISD::SETEQ) ||
4676 (!N1C->isZero() &&Cond ==ISD::SETNE);
4677
4678if (!Inverse)
4679return TopSetCC;
4680
4681ISD::CondCode InvCond =ISD::getSetCCInverse(
4682 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4683 TopSetCC.getOperand(0).getValueType());
4684return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4685 TopSetCC.getOperand(1),
4686 InvCond);
4687 }
4688 }
4689 }
4690
4691// If the LHS is '(and load, const)', the RHS is 0, the test is for
4692// equality or unsigned, and all 1 bits of the const are in the same
4693// partial word, see if we can shorten the load.
4694if (DCI.isBeforeLegalize() &&
4695 !ISD::isSignedIntSetCC(Cond) &&
4696 N0.getOpcode() ==ISD::AND && C1 == 0 &&
4697 N0.getNode()->hasOneUse() &&
4698 isa<LoadSDNode>(N0.getOperand(0)) &&
4699 N0.getOperand(0).getNode()->hasOneUse() &&
4700 isa<ConstantSDNode>(N0.getOperand(1))) {
4701auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4702APInt bestMask;
4703unsigned bestWidth = 0, bestOffset = 0;
4704if (Lod->isSimple() && Lod->isUnindexed() &&
4705 (Lod->getMemoryVT().isByteSized() ||
4706isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4707unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4708unsigned origWidth = N0.getValueSizeInBits();
4709unsigned maskWidth = origWidth;
4710// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4711// 8 bits, but have to be careful...
4712if (Lod->getExtensionType() !=ISD::NON_EXTLOAD)
4713 origWidth = Lod->getMemoryVT().getSizeInBits();
4714constAPInt &Mask = N0.getConstantOperandAPInt(1);
4715// Only consider power-of-2 widths (and at least one byte) as candiates
4716// for the narrowed load.
4717for (unsigned width = 8; width < origWidth; width *= 2) {
4718EVT newVT =EVT::getIntegerVT(*DAG.getContext(), width);
4719if (!shouldReduceLoadWidth(Lod,ISD::NON_EXTLOAD, newVT))
4720continue;
4721APInt newMask =APInt::getLowBitsSet(maskWidth, width);
4722// Avoid accessing any padding here for now (we could use memWidth
4723// instead of origWidth here otherwise).
4724unsigned maxOffset = origWidth - width;
4725for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4726if (Mask.isSubsetOf(newMask)) {
4727unsigned ptrOffset =
4728 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4729unsigned IsFast = 0;
4730Align NewAlign =commonAlignment(Lod->getAlign(), ptrOffset / 8);
4731if (allowsMemoryAccess(
4732 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4733 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4734 IsFast) {
4735 bestOffset = ptrOffset / 8;
4736 bestMask = Mask.lshr(offset);
4737 bestWidth = width;
4738break;
4739 }
4740 }
4741 newMask <<= 8;
4742 }
4743if (bestWidth)
4744break;
4745 }
4746 }
4747if (bestWidth) {
4748EVT newVT =EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4749SDValuePtr = Lod->getBasePtr();
4750if (bestOffset != 0)
4751Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(bestOffset));
4752SDValue NewLoad =
4753 DAG.getLoad(newVT, dl, Lod->getChain(),Ptr,
4754 Lod->getPointerInfo().getWithOffset(bestOffset),
4755 Lod->getOriginalAlign());
4756SDValueAnd =
4757 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4758 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4759return DAG.getSetCC(dl, VT,And, DAG.getConstant(0LL, dl, newVT),Cond);
4760 }
4761 }
4762
4763// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4764if (N0.getOpcode() ==ISD::ZERO_EXTEND) {
4765unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4766
4767// If the comparison constant has bits in the upper part, the
4768// zero-extended value could never match.
4769if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4770 C1.getBitWidth() - InSize))) {
4771switch (Cond) {
4772caseISD::SETUGT:
4773caseISD::SETUGE:
4774caseISD::SETEQ:
4775return DAG.getConstant(0, dl, VT);
4776caseISD::SETULT:
4777caseISD::SETULE:
4778caseISD::SETNE:
4779return DAG.getConstant(1, dl, VT);
4780caseISD::SETGT:
4781caseISD::SETGE:
4782// True if the sign bit of C1 is set.
4783return DAG.getConstant(C1.isNegative(), dl, VT);
4784caseISD::SETLT:
4785caseISD::SETLE:
4786// True if the sign bit of C1 isn't set.
4787return DAG.getConstant(C1.isNonNegative(), dl, VT);
4788default:
4789break;
4790 }
4791 }
4792
4793// Otherwise, we can perform the comparison with the low bits.
4794switch (Cond) {
4795caseISD::SETEQ:
4796caseISD::SETNE:
4797caseISD::SETUGT:
4798caseISD::SETUGE:
4799caseISD::SETULT:
4800caseISD::SETULE: {
4801EVT newVT = N0.getOperand(0).getValueType();
4802// FIXME: Should use isNarrowingProfitable.
4803if (DCI.isBeforeLegalizeOps() ||
4804 (isOperationLegal(ISD::SETCC, newVT) &&
4805isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4806 isTypeDesirableForOp(ISD::SETCC, newVT))) {
4807EVT NewSetCCVT =getSetCCResultType(Layout, *DAG.getContext(), newVT);
4808SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4809
4810SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4811 NewConst,Cond);
4812return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4813 }
4814break;
4815 }
4816default:
4817break;// todo, be more careful with signed comparisons
4818 }
4819 }elseif (N0.getOpcode() ==ISD::SIGN_EXTEND_INREG &&
4820 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
4821 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4822 OpVT)) {
4823EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4824unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4825EVT ExtDstTy = N0.getValueType();
4826unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4827
4828// If the constant doesn't fit into the number of bits for the source of
4829// the sign extension, it is impossible for both sides to be equal.
4830if (C1.getSignificantBits() > ExtSrcTyBits)
4831return DAG.getBoolConstant(Cond ==ISD::SETNE, dl, VT, OpVT);
4832
4833assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4834 ExtDstTy != ExtSrcTy &&"Unexpected types!");
4835APInt Imm =APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4836SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4837 DAG.getConstant(Imm, dl, ExtDstTy));
4838if (!DCI.isCalledByLegalizer())
4839 DCI.AddToWorklist(ZextOp.getNode());
4840// Otherwise, make this a use of a zext.
4841return DAG.getSetCC(dl, VT, ZextOp,
4842 DAG.getConstant(C1 & Imm, dl, ExtDstTy),Cond);
4843 }elseif ((N1C->isZero() || N1C->isOne()) &&
4844 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE)) {
4845// SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4846// excluded as they are handled below whilst checking for foldBooleans.
4847if ((N0.getOpcode() ==ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4848isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4849 (N0.getValueType() == MVT::i1 ||
4850getBooleanContents(N0.getValueType()) ==ZeroOrOneBooleanContent) &&
4851 DAG.MaskedValueIsZero(
4852 N0,APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4853bool TrueWhenTrue = (Cond ==ISD::SETEQ) ^ (!N1C->isOne());
4854if (TrueWhenTrue)
4855return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4856// Invert the condition.
4857if (N0.getOpcode() ==ISD::SETCC) {
4858ISD::CondCodeCC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4859CC =ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4860if (DCI.isBeforeLegalizeOps() ||
4861isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4862return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1),CC);
4863 }
4864 }
4865
4866if ((N0.getOpcode() ==ISD::XOR ||
4867 (N0.getOpcode() ==ISD::AND &&
4868 N0.getOperand(0).getOpcode() ==ISD::XOR &&
4869 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4870isOneConstant(N0.getOperand(1))) {
4871// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4872// can only do this if the top bits are known zero.
4873unsignedBitWidth = N0.getValueSizeInBits();
4874if (DAG.MaskedValueIsZero(N0,
4875APInt::getHighBitsSet(BitWidth,
4876BitWidth-1))) {
4877// Okay, get the un-inverted input value.
4878SDValue Val;
4879if (N0.getOpcode() ==ISD::XOR) {
4880 Val = N0.getOperand(0);
4881 }else {
4882assert(N0.getOpcode() ==ISD::AND &&
4883 N0.getOperand(0).getOpcode() ==ISD::XOR);
4884// ((X^1)&1)^1 -> X & 1
4885 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4886 N0.getOperand(0).getOperand(0),
4887 N0.getOperand(1));
4888 }
4889
4890return DAG.getSetCC(dl, VT, Val, N1,
4891Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);
4892 }
4893 }elseif (N1C->isOne()) {
4894SDValue Op0 = N0;
4895if (Op0.getOpcode() ==ISD::TRUNCATE)
4896 Op0 = Op0.getOperand(0);
4897
4898if ((Op0.getOpcode() ==ISD::XOR) &&
4899 Op0.getOperand(0).getOpcode() ==ISD::SETCC &&
4900 Op0.getOperand(1).getOpcode() ==ISD::SETCC) {
4901SDValue XorLHS = Op0.getOperand(0);
4902SDValue XorRHS = Op0.getOperand(1);
4903// Ensure that the input setccs return an i1 type or 0/1 value.
4904if (Op0.getValueType() == MVT::i1 ||
4905 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4906ZeroOrOneBooleanContent &&
4907getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4908ZeroOrOneBooleanContent)) {
4909// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4910Cond = (Cond ==ISD::SETEQ) ?ISD::SETNE :ISD::SETEQ;
4911return DAG.getSetCC(dl, VT, XorLHS, XorRHS,Cond);
4912 }
4913 }
4914if (Op0.getOpcode() ==ISD::AND &&isOneConstant(Op0.getOperand(1))) {
4915// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4916if (Op0.getValueType().bitsGT(VT))
4917 Op0 = DAG.getNode(ISD::AND, dl, VT,
4918 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4919 DAG.getConstant(1, dl, VT));
4920elseif (Op0.getValueType().bitsLT(VT))
4921 Op0 = DAG.getNode(ISD::AND, dl, VT,
4922 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4923 DAG.getConstant(1, dl, VT));
4924
4925return DAG.getSetCC(dl, VT, Op0,
4926 DAG.getConstant(0, dl, Op0.getValueType()),
4927Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);
4928 }
4929if (Op0.getOpcode() ==ISD::AssertZext &&
4930 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4931return DAG.getSetCC(dl, VT, Op0,
4932 DAG.getConstant(0, dl, Op0.getValueType()),
4933Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);
4934 }
4935 }
4936
4937// Given:
4938// icmp eq/ne (urem %x, %y), 0
4939// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4940// icmp eq/ne %x, 0
4941if (N0.getOpcode() ==ISD::UREM && N1C->isZero() &&
4942 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE)) {
4943KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4944KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4945if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4946return DAG.getSetCC(dl, VT, N0.getOperand(0), N1,Cond);
4947 }
4948
4949// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4950// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4951if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
4952 N0.getOpcode() ==ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4953 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4954 N1C->isAllOnes()) {
4955return DAG.getSetCC(dl, VT, N0.getOperand(0),
4956 DAG.getConstant(0, dl, OpVT),
4957Cond ==ISD::SETEQ ?ISD::SETLT :ISD::SETGE);
4958 }
4959
4960if (SDValue V =
4961 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1,Cond, DCI, dl))
4962return V;
4963 }
4964
4965// These simplifications apply to splat vectors as well.
4966// TODO: Handle more splat vector cases.
4967if (auto *N1C =isConstOrConstSplat(N1)) {
4968constAPInt &C1 = N1C->getAPIntValue();
4969
4970APInt MinVal, MaxVal;
4971unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4972if (ISD::isSignedIntSetCC(Cond)) {
4973 MinVal =APInt::getSignedMinValue(OperandBitSize);
4974 MaxVal =APInt::getSignedMaxValue(OperandBitSize);
4975 }else {
4976 MinVal =APInt::getMinValue(OperandBitSize);
4977 MaxVal =APInt::getMaxValue(OperandBitSize);
4978 }
4979
4980// Canonicalize GE/LE comparisons to use GT/LT comparisons.
4981if (Cond ==ISD::SETGE ||Cond ==ISD::SETUGE) {
4982// X >= MIN --> true
4983if (C1 == MinVal)
4984return DAG.getBoolConstant(true, dl, VT, OpVT);
4985
4986if (!VT.isVector()) {// TODO: Support this for vectors.
4987// X >= C0 --> X > (C0 - 1)
4988APIntC = C1 - 1;
4989ISD::CondCode NewCC = (Cond ==ISD::SETGE) ?ISD::SETGT :ISD::SETUGT;
4990if ((DCI.isBeforeLegalizeOps() ||
4991isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
4992 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4993isLegalICmpImmediate(C.getSExtValue())))) {
4994return DAG.getSetCC(dl, VT, N0,
4995 DAG.getConstant(C, dl, N1.getValueType()),
4996 NewCC);
4997 }
4998 }
4999 }
5000
5001if (Cond ==ISD::SETLE ||Cond ==ISD::SETULE) {
5002// X <= MAX --> true
5003if (C1 == MaxVal)
5004return DAG.getBoolConstant(true, dl, VT, OpVT);
5005
5006// X <= C0 --> X < (C0 + 1)
5007if (!VT.isVector()) {// TODO: Support this for vectors.
5008APIntC = C1 + 1;
5009ISD::CondCode NewCC = (Cond ==ISD::SETLE) ?ISD::SETLT :ISD::SETULT;
5010if ((DCI.isBeforeLegalizeOps() ||
5011isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5012 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5013isLegalICmpImmediate(C.getSExtValue())))) {
5014return DAG.getSetCC(dl, VT, N0,
5015 DAG.getConstant(C, dl, N1.getValueType()),
5016 NewCC);
5017 }
5018 }
5019 }
5020
5021if (Cond ==ISD::SETLT ||Cond ==ISD::SETULT) {
5022if (C1 == MinVal)
5023return DAG.getBoolConstant(false, dl, VT, OpVT);// X < MIN --> false
5024
5025// TODO: Support this for vectors after legalize ops.
5026if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5027// Canonicalize setlt X, Max --> setne X, Max
5028if (C1 == MaxVal)
5029return DAG.getSetCC(dl, VT, N0, N1,ISD::SETNE);
5030
5031// If we have setult X, 1, turn it into seteq X, 0
5032if (C1 == MinVal+1)
5033return DAG.getSetCC(dl, VT, N0,
5034 DAG.getConstant(MinVal, dl, N0.getValueType()),
5035ISD::SETEQ);
5036 }
5037 }
5038
5039if (Cond ==ISD::SETGT ||Cond ==ISD::SETUGT) {
5040if (C1 == MaxVal)
5041return DAG.getBoolConstant(false, dl, VT, OpVT);// X > MAX --> false
5042
5043// TODO: Support this for vectors after legalize ops.
5044if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5045// Canonicalize setgt X, Min --> setne X, Min
5046if (C1 == MinVal)
5047return DAG.getSetCC(dl, VT, N0, N1,ISD::SETNE);
5048
5049// If we have setugt X, Max-1, turn it into seteq X, Max
5050if (C1 == MaxVal-1)
5051return DAG.getSetCC(dl, VT, N0,
5052 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5053ISD::SETEQ);
5054 }
5055 }
5056
5057if (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) {
5058// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5059if (C1.isZero())
5060if (SDValueCC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5061 VT, N0, N1,Cond, DCI, dl))
5062returnCC;
5063
5064// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5065// For example, when high 32-bits of i64 X are known clear:
5066// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5067// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5068bool CmpZero = N1C->isZero();
5069bool CmpNegOne = N1C->isAllOnes();
5070if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5071// Match or(lo,shl(hi,bw/2)) pattern.
5072auto IsConcat = [&](SDValue V,SDValue &Lo,SDValue &Hi) {
5073unsigned EltBits = V.getScalarValueSizeInBits();
5074if (V.getOpcode() !=ISD::OR || (EltBits % 2) != 0)
5075returnfalse;
5076SDValueLHS = V.getOperand(0);
5077SDValueRHS = V.getOperand(1);
5078APInt HiBits =APInt::getHighBitsSet(EltBits, EltBits / 2);
5079// Unshifted element must have zero upperbits.
5080if (RHS.getOpcode() ==ISD::SHL &&
5081 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5082RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5083 DAG.MaskedValueIsZero(LHS, HiBits)) {
5084Lo =LHS;
5085Hi =RHS.getOperand(0);
5086returntrue;
5087 }
5088if (LHS.getOpcode() ==ISD::SHL &&
5089 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5090LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5091 DAG.MaskedValueIsZero(RHS, HiBits)) {
5092Lo =RHS;
5093Hi =LHS.getOperand(0);
5094returntrue;
5095 }
5096returnfalse;
5097 };
5098
5099auto MergeConcat = [&](SDValueLo,SDValueHi) {
5100unsigned EltBits = N0.getScalarValueSizeInBits();
5101unsigned HalfBits = EltBits / 2;
5102APInt HiBits =APInt::getHighBitsSet(EltBits, HalfBits);
5103SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5104SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT,Hi, LoBits);
5105SDValue NewN0 =
5106 DAG.getNode(CmpZero ?ISD::OR :ISD::AND, dl, OpVT,Lo, HiMask);
5107SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5108return DAG.getSetCC(dl, VT, NewN0, NewN1,Cond);
5109 };
5110
5111SDValueLo,Hi;
5112if (IsConcat(N0,Lo,Hi))
5113return MergeConcat(Lo,Hi);
5114
5115if (N0.getOpcode() ==ISD::AND || N0.getOpcode() ==ISD::OR) {
5116SDValue Lo0, Lo1, Hi0, Hi1;
5117if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5118 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5119return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5120 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5121 }
5122 }
5123 }
5124 }
5125
5126// If we have "setcc X, C0", check to see if we can shrink the immediate
5127// by changing cc.
5128// TODO: Support this for vectors after legalize ops.
5129if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5130// SETUGT X, SINTMAX -> SETLT X, 0
5131// SETUGE X, SINTMIN -> SETLT X, 0
5132if ((Cond ==ISD::SETUGT && C1.isMaxSignedValue()) ||
5133 (Cond ==ISD::SETUGE && C1.isMinSignedValue()))
5134return DAG.getSetCC(dl, VT, N0,
5135 DAG.getConstant(0, dl, N1.getValueType()),
5136ISD::SETLT);
5137
5138// SETULT X, SINTMIN -> SETGT X, -1
5139// SETULE X, SINTMAX -> SETGT X, -1
5140if ((Cond ==ISD::SETULT && C1.isMinSignedValue()) ||
5141 (Cond ==ISD::SETULE && C1.isMaxSignedValue()))
5142return DAG.getSetCC(dl, VT, N0,
5143 DAG.getAllOnesConstant(dl, N1.getValueType()),
5144ISD::SETGT);
5145 }
5146 }
5147
5148// Back to non-vector simplifications.
5149// TODO: Can we do these for vector splats?
5150if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5151constAPInt &C1 = N1C->getAPIntValue();
5152EVT ShValTy = N0.getValueType();
5153
5154// Fold bit comparisons when we can. This will result in an
5155// incorrect value when boolean false is negative one, unless
5156// the bitsize is 1 in which case the false value is the same
5157// in practice regardless of the representation.
5158if ((VT.getSizeInBits() == 1 ||
5159getBooleanContents(N0.getValueType()) ==ZeroOrOneBooleanContent) &&
5160 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
5161 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5162 N0.getOpcode() ==ISD::AND) {
5163if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5164if (Cond ==ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5165// Perform the xform if the AND RHS is a single bit.
5166unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5167if (AndRHS->getAPIntValue().isPowerOf2() &&
5168 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5169return DAG.getNode(
5170ISD::TRUNCATE, dl, VT,
5171 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5172 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5173 }
5174 }elseif (Cond ==ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5175// (X & 8) == 8 --> (X & 8) >> 3
5176// Perform the xform if C1 is a single bit.
5177unsigned ShCt = C1.logBase2();
5178if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5179return DAG.getNode(
5180ISD::TRUNCATE, dl, VT,
5181 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5182 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5183 }
5184 }
5185 }
5186 }
5187
5188if (C1.getSignificantBits() <= 64 &&
5189 !isLegalICmpImmediate(C1.getSExtValue())) {
5190// (X & -256) == 256 -> (X >> 8) == 1
5191if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
5192 N0.getOpcode() ==ISD::AND && N0.hasOneUse()) {
5193if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5194constAPInt &AndRHSC = AndRHS->getAPIntValue();
5195if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5196unsigned ShiftBits = AndRHSC.countr_zero();
5197if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5198SDValue Shift = DAG.getNode(
5199ISD::SRL, dl, ShValTy, N0.getOperand(0),
5200 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5201SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5202return DAG.getSetCC(dl, VT, Shift, CmpRHS,Cond);
5203 }
5204 }
5205 }
5206 }elseif (Cond ==ISD::SETULT ||Cond ==ISD::SETUGE ||
5207Cond ==ISD::SETULE ||Cond ==ISD::SETUGT) {
5208bool AdjOne = (Cond ==ISD::SETULE ||Cond ==ISD::SETUGT);
5209// X < 0x100000000 -> (X >> 32) < 1
5210// X >= 0x100000000 -> (X >> 32) >= 1
5211// X <= 0x0ffffffff -> (X >> 32) < 1
5212// X > 0x0ffffffff -> (X >> 32) >= 1
5213unsigned ShiftBits;
5214APInt NewC = C1;
5215ISD::CondCode NewCond =Cond;
5216if (AdjOne) {
5217 ShiftBits = C1.countr_one();
5218 NewC = NewC + 1;
5219 NewCond = (Cond ==ISD::SETULE) ?ISD::SETULT :ISD::SETUGE;
5220 }else {
5221 ShiftBits = C1.countr_zero();
5222 }
5223 NewC.lshrInPlace(ShiftBits);
5224if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5225isLegalICmpImmediate(NewC.getSExtValue()) &&
5226 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5227SDValue Shift =
5228 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5229 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5230SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5231return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5232 }
5233 }
5234 }
5235 }
5236
5237if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5238auto *CFP = cast<ConstantFPSDNode>(N1);
5239assert(!CFP->getValueAPF().isNaN() &&"Unexpected NaN value");
5240
5241// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5242// constant if knowing that the operand is non-nan is enough. We prefer to
5243// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5244// materialize 0.0.
5245if (Cond ==ISD::SETO ||Cond ==ISD::SETUO)
5246return DAG.getSetCC(dl, VT, N0, N0,Cond);
5247
5248// setcc (fneg x), C -> setcc swap(pred) x, -C
5249if (N0.getOpcode() ==ISD::FNEG) {
5250ISD::CondCode SwapCond =ISD::getSetCCSwappedOperands(Cond);
5251if (DCI.isBeforeLegalizeOps() ||
5252isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5253SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5254return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5255 }
5256 }
5257
5258// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5259if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5260 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5261bool IsFabs = N0.getOpcode() ==ISD::FABS;
5262SDValueOp = IsFabs ? N0.getOperand(0) : N0;
5263if ((Cond ==ISD::SETOEQ ||Cond ==ISD::SETUEQ) && CFP->isInfinity()) {
5264FPClassTest Flag = CFP->isNegative() ? (IsFabs ?fcNone :fcNegInf)
5265 : (IsFabs ?fcInf :fcPosInf);
5266if (Cond ==ISD::SETUEQ)
5267 Flag |=fcNan;
5268return DAG.getNode(ISD::IS_FPCLASS, dl, VT,Op,
5269 DAG.getTargetConstant(Flag, dl, MVT::i32));
5270 }
5271 }
5272
5273// If the condition is not legal, see if we can find an equivalent one
5274// which is legal.
5275if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5276// If the comparison was an awkward floating-point == or != and one of
5277// the comparison operands is infinity or negative infinity, convert the
5278// condition to a less-awkward <= or >=.
5279if (CFP->getValueAPF().isInfinity()) {
5280bool IsNegInf = CFP->getValueAPF().isNegative();
5281ISD::CondCode NewCond =ISD::SETCC_INVALID;
5282switch (Cond) {
5283caseISD::SETOEQ: NewCond = IsNegInf ?ISD::SETOLE :ISD::SETOGE;break;
5284caseISD::SETUEQ: NewCond = IsNegInf ?ISD::SETULE :ISD::SETUGE;break;
5285caseISD::SETUNE: NewCond = IsNegInf ?ISD::SETUGT :ISD::SETULT;break;
5286caseISD::SETONE: NewCond = IsNegInf ?ISD::SETOGT :ISD::SETOLT;break;
5287default:break;
5288 }
5289if (NewCond !=ISD::SETCC_INVALID &&
5290isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5291return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5292 }
5293 }
5294 }
5295
5296if (N0 == N1) {
5297// The sext(setcc()) => setcc() optimization relies on the appropriate
5298// constant being emitted.
5299assert(!N0.getValueType().isInteger() &&
5300"Integer types should be handled by FoldSetCC");
5301
5302bool EqTrue =ISD::isTrueWhenEqual(Cond);
5303unsigned UOF =ISD::getUnorderedFlavor(Cond);
5304if (UOF == 2)// FP operators that are undefined on NaNs.
5305return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5306if (UOF ==unsigned(EqTrue))
5307return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5308// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5309// if it is not already.
5310ISD::CondCode NewCond = UOF == 0 ?ISD::SETO :ISD::SETUO;
5311if (NewCond !=Cond &&
5312 (DCI.isBeforeLegalizeOps() ||
5313isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5314return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5315 }
5316
5317// ~X > ~Y --> Y > X
5318// ~X < ~Y --> Y < X
5319// ~X < C --> X > ~C
5320// ~X > C --> X < ~C
5321if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5322 N0.getValueType().isInteger()) {
5323if (isBitwiseNot(N0)) {
5324if (isBitwiseNot(N1))
5325return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0),Cond);
5326
5327if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5328 !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5329SDValue Not = DAG.getNOT(dl, N1, OpVT);
5330return DAG.getSetCC(dl, VT, Not, N0.getOperand(0),Cond);
5331 }
5332 }
5333 }
5334
5335if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
5336 N0.getValueType().isInteger()) {
5337if (N0.getOpcode() ==ISD::ADD || N0.getOpcode() ==ISD::SUB ||
5338 N0.getOpcode() ==ISD::XOR) {
5339// Simplify (X+Y) == (X+Z) --> Y == Z
5340if (N0.getOpcode() == N1.getOpcode()) {
5341if (N0.getOperand(0) == N1.getOperand(0))
5342return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1),Cond);
5343if (N0.getOperand(1) == N1.getOperand(1))
5344return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0),Cond);
5345if (isCommutativeBinOp(N0.getOpcode())) {
5346// If X op Y == Y op X, try other combinations.
5347if (N0.getOperand(0) == N1.getOperand(1))
5348return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5349Cond);
5350if (N0.getOperand(1) == N1.getOperand(0))
5351return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5352Cond);
5353 }
5354 }
5355
5356// If RHS is a legal immediate value for a compare instruction, we need
5357// to be careful about increasing register pressure needlessly.
5358bool LegalRHSImm =false;
5359
5360if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5361if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5362// Turn (X+C1) == C2 --> X == C2-C1
5363if (N0.getOpcode() ==ISD::ADD && N0.getNode()->hasOneUse())
5364return DAG.getSetCC(
5365 dl, VT, N0.getOperand(0),
5366 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5367 dl, N0.getValueType()),
5368Cond);
5369
5370// Turn (X^C1) == C2 --> X == C1^C2
5371if (N0.getOpcode() ==ISD::XOR && N0.getNode()->hasOneUse())
5372return DAG.getSetCC(
5373 dl, VT, N0.getOperand(0),
5374 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5375 dl, N0.getValueType()),
5376Cond);
5377 }
5378
5379// Turn (C1-X) == C2 --> X == C1-C2
5380if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5381if (N0.getOpcode() ==ISD::SUB && N0.getNode()->hasOneUse())
5382return DAG.getSetCC(
5383 dl, VT, N0.getOperand(1),
5384 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5385 dl, N0.getValueType()),
5386Cond);
5387
5388// Could RHSC fold directly into a compare?
5389if (RHSC->getValueType(0).getSizeInBits() <= 64)
5390 LegalRHSImm =isLegalICmpImmediate(RHSC->getSExtValue());
5391 }
5392
5393// (X+Y) == X --> Y == 0 and similar folds.
5394// Don't do this if X is an immediate that can fold into a cmp
5395// instruction and X+Y has other uses. It could be an induction variable
5396// chain, and the transform would increase register pressure.
5397if (!LegalRHSImm || N0.hasOneUse())
5398if (SDValue V = foldSetCCWithBinOp(VT, N0, N1,Cond, dl, DCI))
5399return V;
5400 }
5401
5402if (N1.getOpcode() ==ISD::ADD || N1.getOpcode() ==ISD::SUB ||
5403 N1.getOpcode() ==ISD::XOR)
5404if (SDValue V = foldSetCCWithBinOp(VT, N1, N0,Cond, dl, DCI))
5405return V;
5406
5407if (SDValue V = foldSetCCWithAnd(VT, N0, N1,Cond, dl, DCI))
5408return V;
5409 }
5410
5411// Fold remainder of division by a constant.
5412if ((N0.getOpcode() ==ISD::UREM || N0.getOpcode() ==ISD::SREM) &&
5413 N0.hasOneUse() && (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE)) {
5414// When division is cheap or optimizing for minimum size,
5415// fall through to DIVREM creation by skipping this fold.
5416if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5417if (N0.getOpcode() ==ISD::UREM) {
5418if (SDValue Folded = buildUREMEqFold(VT, N0, N1,Cond, DCI, dl))
5419return Folded;
5420 }elseif (N0.getOpcode() ==ISD::SREM) {
5421if (SDValue Folded = buildSREMEqFold(VT, N0, N1,Cond, DCI, dl))
5422return Folded;
5423 }
5424 }
5425 }
5426
5427// Fold away ALL boolean setcc's.
5428if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5429SDValue Temp;
5430switch (Cond) {
5431default:llvm_unreachable("Unknown integer setcc!");
5432caseISD::SETEQ:// X == Y -> ~(X^Y)
5433 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5434 N0 = DAG.getNOT(dl, Temp, OpVT);
5435if (!DCI.isCalledByLegalizer())
5436 DCI.AddToWorklist(Temp.getNode());
5437break;
5438caseISD::SETNE:// X != Y --> (X^Y)
5439 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5440break;
5441caseISD::SETGT:// X >s Y --> X == 0 & Y == 1 --> ~X & Y
5442caseISD::SETULT:// X <u Y --> X == 0 & Y == 1 --> ~X & Y
5443 Temp = DAG.getNOT(dl, N0, OpVT);
5444 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5445if (!DCI.isCalledByLegalizer())
5446 DCI.AddToWorklist(Temp.getNode());
5447break;
5448caseISD::SETLT:// X <s Y --> X == 1 & Y == 0 --> ~Y & X
5449caseISD::SETUGT:// X >u Y --> X == 1 & Y == 0 --> ~Y & X
5450 Temp = DAG.getNOT(dl, N1, OpVT);
5451 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5452if (!DCI.isCalledByLegalizer())
5453 DCI.AddToWorklist(Temp.getNode());
5454break;
5455caseISD::SETULE:// X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5456caseISD::SETGE:// X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5457 Temp = DAG.getNOT(dl, N0, OpVT);
5458 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5459if (!DCI.isCalledByLegalizer())
5460 DCI.AddToWorklist(Temp.getNode());
5461break;
5462caseISD::SETUGE:// X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5463caseISD::SETLE:// X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5464 Temp = DAG.getNOT(dl, N1, OpVT);
5465 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5466break;
5467 }
5468if (VT.getScalarType() != MVT::i1) {
5469if (!DCI.isCalledByLegalizer())
5470 DCI.AddToWorklist(N0.getNode());
5471// FIXME: If running after legalize, we probably can't do this.
5472ISD::NodeType ExtendCode =getExtendForContent(getBooleanContents(OpVT));
5473 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5474 }
5475return N0;
5476 }
5477
5478// Could not fold it.
5479returnSDValue();
5480}
5481
5482/// Returns true (and the GlobalValue and the offset) if the node is a
5483/// GlobalAddress + offset.
5484boolTargetLowering::isGAPlusOffset(SDNode *WN,constGlobalValue *&GA,
5485 int64_t &Offset) const{
5486
5487SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5488
5489if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5490 GA = GASD->getGlobal();
5491Offset += GASD->getOffset();
5492returntrue;
5493 }
5494
5495if (N->getOpcode() ==ISD::ADD) {
5496SDValue N1 =N->getOperand(0);
5497SDValue N2 =N->getOperand(1);
5498if (isGAPlusOffset(N1.getNode(), GA,Offset)) {
5499if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5500Offset += V->getSExtValue();
5501returntrue;
5502 }
5503 }elseif (isGAPlusOffset(N2.getNode(), GA,Offset)) {
5504if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5505Offset += V->getSExtValue();
5506returntrue;
5507 }
5508 }
5509 }
5510
5511returnfalse;
5512}
5513
5514SDValueTargetLowering::PerformDAGCombine(SDNode *N,
5515DAGCombinerInfo &DCI) const{
5516// Default implementation: no optimization.
5517returnSDValue();
5518}
5519
5520//===----------------------------------------------------------------------===//
5521// Inline Assembler Implementation Methods
5522//===----------------------------------------------------------------------===//
5523
5524TargetLowering::ConstraintType
5525TargetLowering::getConstraintType(StringRef Constraint) const{
5526unsigned S = Constraint.size();
5527
5528if (S == 1) {
5529switch (Constraint[0]) {
5530default:break;
5531case'r':
5532return C_RegisterClass;
5533case'm':// memory
5534case'o':// offsetable
5535case'V':// not offsetable
5536return C_Memory;
5537case'p':// Address.
5538return C_Address;
5539case'n':// Simple Integer
5540case'E':// Floating Point Constant
5541case'F':// Floating Point Constant
5542return C_Immediate;
5543case'i':// Simple Integer or Relocatable Constant
5544case's':// Relocatable Constant
5545case'X':// Allow ANY value.
5546case'I':// Target registers.
5547case'J':
5548case'K':
5549case'L':
5550case'M':
5551case'N':
5552case'O':
5553case'P':
5554case'<':
5555case'>':
5556return C_Other;
5557 }
5558 }
5559
5560if (S > 1 && Constraint[0] =='{' && Constraint[S - 1] =='}') {
5561if (S == 8 && Constraint.substr(1, 6) =="memory")// "{memory}"
5562return C_Memory;
5563return C_Register;
5564 }
5565return C_Unknown;
5566}
5567
5568/// Try to replace an X constraint, which matches anything, with another that
5569/// has more specific requirements based on the type of the corresponding
5570/// operand.
5571constchar *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
5572if (ConstraintVT.isInteger())
5573return"r";
5574if (ConstraintVT.isFloatingPoint())
5575return"f";// works for many targets
5576returnnullptr;
5577}
5578
5579SDValueTargetLowering::LowerAsmOutputForConstraint(
5580SDValue &Chain,SDValue &Glue,constSDLoc &DL,
5581constAsmOperandInfo &OpInfo,SelectionDAG &DAG) const{
5582returnSDValue();
5583}
5584
5585/// Lower the specified operand into the Ops vector.
5586/// If it is invalid, don't add anything to Ops.
5587voidTargetLowering::LowerAsmOperandForConstraint(SDValueOp,
5588StringRef Constraint,
5589 std::vector<SDValue> &Ops,
5590SelectionDAG &DAG) const{
5591
5592if (Constraint.size() > 1)
5593return;
5594
5595char ConstraintLetter = Constraint[0];
5596switch (ConstraintLetter) {
5597default:break;
5598case'X':// Allows any operand
5599case'i':// Simple Integer or Relocatable Constant
5600case'n':// Simple Integer
5601case's': {// Relocatable Constant
5602
5603ConstantSDNode *C;
5604uint64_tOffset = 0;
5605
5606// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5607// etc., since getelementpointer is variadic. We can't use
5608// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5609// while in this case the GA may be furthest from the root node which is
5610// likely an ISD::ADD.
5611while (true) {
5612if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter !='s') {
5613// gcc prints these as sign extended. Sign extend value to 64 bits
5614// now; without this it would get ZExt'd later in
5615// ScheduleDAGSDNodes::EmitNode, which is very generic.
5616bool IsBool =C->getConstantIntValue()->getBitWidth() == 1;
5617BooleanContent BCont =getBooleanContents(MVT::i64);
5618ISD::NodeType ExtOpc =
5619 IsBool ?getExtendForContent(BCont) :ISD::SIGN_EXTEND;
5620 int64_t ExtVal =
5621 ExtOpc ==ISD::ZERO_EXTEND ?C->getZExtValue() :C->getSExtValue();
5622 Ops.push_back(
5623 DAG.getTargetConstant(Offset + ExtVal,SDLoc(C), MVT::i64));
5624return;
5625 }
5626if (ConstraintLetter !='n') {
5627if (constauto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5628 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),SDLoc(Op),
5629 GA->getValueType(0),
5630Offset + GA->getOffset()));
5631return;
5632 }
5633if (constauto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5634 Ops.push_back(DAG.getTargetBlockAddress(
5635 BA->getBlockAddress(), BA->getValueType(0),
5636Offset + BA->getOffset(), BA->getTargetFlags()));
5637return;
5638 }
5639if (isa<BasicBlockSDNode>(Op)) {
5640 Ops.push_back(Op);
5641return;
5642 }
5643 }
5644constunsigned OpCode =Op.getOpcode();
5645if (OpCode ==ISD::ADD || OpCode ==ISD::SUB) {
5646if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5647Op =Op.getOperand(1);
5648// Subtraction is not commutative.
5649elseif (OpCode ==ISD::ADD &&
5650 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5651Op =Op.getOperand(0);
5652else
5653return;
5654Offset += (OpCode ==ISD::ADD ? 1 : -1) *C->getSExtValue();
5655continue;
5656 }
5657return;
5658 }
5659break;
5660 }
5661 }
5662}
5663
5664voidTargetLowering::CollectTargetIntrinsicOperands(
5665constCallInst &I,SmallVectorImpl<SDValue> &Ops,SelectionDAG &DAG) const{
5666}
5667
5668std::pair<unsigned, const TargetRegisterClass *>
5669TargetLowering::getRegForInlineAsmConstraint(constTargetRegisterInfo *RI,
5670StringRef Constraint,
5671MVT VT) const{
5672if (!Constraint.starts_with("{"))
5673return std::make_pair(0u,static_cast<TargetRegisterClass *>(nullptr));
5674assert(*(Constraint.end() - 1) =='}' &&"Not a brace enclosed constraint?");
5675
5676// Remove the braces from around the name.
5677StringRefRegName(Constraint.data() + 1, Constraint.size() - 2);
5678
5679 std::pair<unsigned, const TargetRegisterClass *> R =
5680 std::make_pair(0u,static_cast<constTargetRegisterClass *>(nullptr));
5681
5682// Figure out which register class contains this reg.
5683for (constTargetRegisterClass *RC : RI->regclasses()) {
5684// If none of the value types for this register class are valid, we
5685// can't use it. For example, 64-bit reg classes on 32-bit targets.
5686if (!isLegalRC(*RI, *RC))
5687continue;
5688
5689for (constMCPhysReg &PR : *RC) {
5690if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5691 std::pair<unsigned, const TargetRegisterClass *> S =
5692 std::make_pair(PR, RC);
5693
5694// If this register class has the requested value type, return it,
5695// otherwise keep searching and return the first class found
5696// if no other is found which explicitly has the requested type.
5697if (RI->isTypeLegalForClass(*RC, VT))
5698return S;
5699if (!R.second)
5700 R = S;
5701 }
5702 }
5703 }
5704
5705return R;
5706}
5707
5708//===----------------------------------------------------------------------===//
5709// Constraint Selection.
5710
5711/// Return true of this is an input operand that is a matching constraint like
5712/// "4".
5713boolTargetLowering::AsmOperandInfo::isMatchingInputConstraint() const{
5714assert(!ConstraintCode.empty() &&"No known constraint!");
5715return isdigit(static_cast<unsignedchar>(ConstraintCode[0]));
5716}
5717
5718/// If this is an input matching constraint, this method returns the output
5719/// operand it matches.
5720unsignedTargetLowering::AsmOperandInfo::getMatchedOperand() const{
5721assert(!ConstraintCode.empty() &&"No known constraint!");
5722return atoi(ConstraintCode.c_str());
5723}
5724
5725/// Split up the constraint string from the inline assembly value into the
5726/// specific constraints and their prefixes, and also tie in the associated
5727/// operand values.
5728/// If this returns an empty vector, and if the constraint string itself
5729/// isn't empty, there was an error parsing.
5730TargetLowering::AsmOperandInfoVector
5731TargetLowering::ParseConstraints(constDataLayout &DL,
5732constTargetRegisterInfo *TRI,
5733constCallBase &Call) const{
5734 /// Information about all of the constraints.
5735AsmOperandInfoVector ConstraintOperands;
5736constInlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5737unsigned maCount = 0;// Largest number of multiple alternative constraints.
5738
5739// Do a prepass over the constraints, canonicalizing them, and building up the
5740// ConstraintOperands list.
5741unsigned ArgNo = 0;// ArgNo - The argument of the CallInst.
5742unsigned ResNo = 0;// ResNo - The result number of the next output.
5743unsigned LabelNo = 0;// LabelNo - CallBr indirect dest number.
5744
5745for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5746 ConstraintOperands.emplace_back(std::move(CI));
5747AsmOperandInfo &OpInfo = ConstraintOperands.back();
5748
5749// Update multiple alternative constraint count.
5750if (OpInfo.multipleAlternatives.size() > maCount)
5751 maCount = OpInfo.multipleAlternatives.size();
5752
5753 OpInfo.ConstraintVT = MVT::Other;
5754
5755// Compute the value type for each operand.
5756switch (OpInfo.Type) {
5757caseInlineAsm::isOutput:
5758// Indirect outputs just consume an argument.
5759if (OpInfo.isIndirect) {
5760 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5761break;
5762 }
5763
5764// The return value of the call is this value. As such, there is no
5765// corresponding argument.
5766assert(!Call.getType()->isVoidTy() &&"Bad inline asm!");
5767if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5768 OpInfo.ConstraintVT =
5769getAsmOperandValueType(DL, STy->getElementType(ResNo))
5770 .getSimpleVT();
5771 }else {
5772assert(ResNo == 0 &&"Asm only has one result!");
5773 OpInfo.ConstraintVT =
5774getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5775 }
5776 ++ResNo;
5777break;
5778caseInlineAsm::isInput:
5779 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5780break;
5781caseInlineAsm::isLabel:
5782 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5783 ++LabelNo;
5784continue;
5785caseInlineAsm::isClobber:
5786// Nothing to do.
5787break;
5788 }
5789
5790if (OpInfo.CallOperandVal) {
5791llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5792if (OpInfo.isIndirect) {
5793 OpTy = Call.getParamElementType(ArgNo);
5794assert(OpTy &&"Indirect operand must have elementtype attribute");
5795 }
5796
5797// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5798if (StructType *STy = dyn_cast<StructType>(OpTy))
5799if (STy->getNumElements() == 1)
5800 OpTy = STy->getElementType(0);
5801
5802// If OpTy is not a single value, it may be a struct/union that we
5803// can tile with integers.
5804if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5805unsigned BitSize =DL.getTypeSizeInBits(OpTy);
5806switch (BitSize) {
5807default:break;
5808case 1:
5809case 8:
5810case 16:
5811case 32:
5812case 64:
5813case 128:
5814 OpTy =IntegerType::get(OpTy->getContext(), BitSize);
5815break;
5816 }
5817 }
5818
5819EVT VT =getAsmOperandValueType(DL, OpTy,true);
5820 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5821 ArgNo++;
5822 }
5823 }
5824
5825// If we have multiple alternative constraints, select the best alternative.
5826if (!ConstraintOperands.empty()) {
5827if (maCount) {
5828unsigned bestMAIndex = 0;
5829int bestWeight = -1;
5830// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5831int weight = -1;
5832unsigned maIndex;
5833// Compute the sums of the weights for each alternative, keeping track
5834// of the best (highest weight) one so far.
5835for (maIndex = 0; maIndex < maCount; ++maIndex) {
5836int weightSum = 0;
5837for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5838 cIndex != eIndex; ++cIndex) {
5839AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5840if (OpInfo.Type ==InlineAsm::isClobber)
5841continue;
5842
5843// If this is an output operand with a matching input operand,
5844// look up the matching input. If their types mismatch, e.g. one
5845// is an integer, the other is floating point, or their sizes are
5846// different, flag it as an maCantMatch.
5847if (OpInfo.hasMatchingInput()) {
5848AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5849if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5850if ((OpInfo.ConstraintVT.isInteger() !=
5851 Input.ConstraintVT.isInteger()) ||
5852 (OpInfo.ConstraintVT.getSizeInBits() !=
5853 Input.ConstraintVT.getSizeInBits())) {
5854 weightSum = -1;// Can't match.
5855break;
5856 }
5857 }
5858 }
5859 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5860if (weight == -1) {
5861 weightSum = -1;
5862break;
5863 }
5864 weightSum += weight;
5865 }
5866// Update best.
5867if (weightSum > bestWeight) {
5868 bestWeight = weightSum;
5869 bestMAIndex = maIndex;
5870 }
5871 }
5872
5873// Now select chosen alternative in each constraint.
5874for (AsmOperandInfo &cInfo : ConstraintOperands)
5875if (cInfo.Type !=InlineAsm::isClobber)
5876 cInfo.selectAlternative(bestMAIndex);
5877 }
5878 }
5879
5880// Check and hook up tied operands, choose constraint code to use.
5881for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5882 cIndex != eIndex; ++cIndex) {
5883AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5884
5885// If this is an output operand with a matching input operand, look up the
5886// matching input. If their types mismatch, e.g. one is an integer, the
5887// other is floating point, or their sizes are different, flag it as an
5888// error.
5889if (OpInfo.hasMatchingInput()) {
5890AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5891
5892if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5893 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5894 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5895 OpInfo.ConstraintVT);
5896 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5897 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5898 Input.ConstraintVT);
5899constbool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5900 OpInfo.ConstraintVT.isFloatingPoint();
5901constbool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5902 Input.ConstraintVT.isFloatingPoint();
5903if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5904 (MatchRC.second != InputRC.second)) {
5905report_fatal_error("Unsupported asm: input constraint"
5906" with a matching output constraint of"
5907" incompatible type!");
5908 }
5909 }
5910 }
5911 }
5912
5913return ConstraintOperands;
5914}
5915
5916/// Return a number indicating our preference for chosing a type of constraint
5917/// over another, for the purpose of sorting them. Immediates are almost always
5918/// preferrable (when they can be emitted). A higher return value means a
5919/// stronger preference for one constraint type relative to another.
5920/// FIXME: We should prefer registers over memory but doing so may lead to
5921/// unrecoverable register exhaustion later.
5922/// https://github.com/llvm/llvm-project/issues/20571
5923staticunsignedgetConstraintPiority(TargetLowering::ConstraintType CT) {
5924switch (CT) {
5925caseTargetLowering::C_Immediate:
5926caseTargetLowering::C_Other:
5927return 4;
5928caseTargetLowering::C_Memory:
5929caseTargetLowering::C_Address:
5930return 3;
5931caseTargetLowering::C_RegisterClass:
5932return 2;
5933caseTargetLowering::C_Register:
5934return 1;
5935caseTargetLowering::C_Unknown:
5936return 0;
5937 }
5938llvm_unreachable("Invalid constraint type");
5939}
5940
5941/// Examine constraint type and operand type and determine a weight value.
5942/// This object must already have been set up with the operand type
5943/// and the current alternative constraint selected.
5944TargetLowering::ConstraintWeight
5945TargetLowering::getMultipleConstraintMatchWeight(
5946AsmOperandInfo &info,int maIndex) const{
5947InlineAsm::ConstraintCodeVector *rCodes;
5948if (maIndex >= (int)info.multipleAlternatives.size())
5949 rCodes = &info.Codes;
5950else
5951 rCodes = &info.multipleAlternatives[maIndex].Codes;
5952ConstraintWeight BestWeight = CW_Invalid;
5953
5954// Loop over the options, keeping track of the most general one.
5955for (const std::string &rCode : *rCodes) {
5956ConstraintWeight weight =
5957 getSingleConstraintMatchWeight(info, rCode.c_str());
5958if (weight > BestWeight)
5959 BestWeight = weight;
5960 }
5961
5962return BestWeight;
5963}
5964
5965/// Examine constraint type and operand type and determine a weight value.
5966/// This object must already have been set up with the operand type
5967/// and the current alternative constraint selected.
5968TargetLowering::ConstraintWeight
5969TargetLowering::getSingleConstraintMatchWeight(
5970AsmOperandInfo &info,constchar *constraint) const{
5971ConstraintWeight weight = CW_Invalid;
5972Value *CallOperandVal =info.CallOperandVal;
5973// If we don't have a value, we can't do a match,
5974// but allow it at the lowest weight.
5975if (!CallOperandVal)
5976return CW_Default;
5977// Look at the constraint type.
5978switch (*constraint) {
5979case'i':// immediate integer.
5980case'n':// immediate integer with a known value.
5981if (isa<ConstantInt>(CallOperandVal))
5982 weight = CW_Constant;
5983break;
5984case's':// non-explicit intregal immediate.
5985if (isa<GlobalValue>(CallOperandVal))
5986 weight = CW_Constant;
5987break;
5988case'E':// immediate float if host format.
5989case'F':// immediate float.
5990if (isa<ConstantFP>(CallOperandVal))
5991 weight = CW_Constant;
5992break;
5993case'<':// memory operand with autodecrement.
5994case'>':// memory operand with autoincrement.
5995case'm':// memory operand.
5996case'o':// offsettable memory operand
5997case'V':// non-offsettable memory operand
5998 weight = CW_Memory;
5999break;
6000case'r':// general register.
6001case'g':// general register, memory operand or immediate integer.
6002// note: Clang converts "g" to "imr".
6003if (CallOperandVal->getType()->isIntegerTy())
6004 weight = CW_Register;
6005break;
6006case'X':// any operand.
6007default:
6008 weight = CW_Default;
6009break;
6010 }
6011return weight;
6012}
6013
6014/// If there are multiple different constraints that we could pick for this
6015/// operand (e.g. "imr") try to pick the 'best' one.
6016/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6017/// into seven classes:
6018/// Register -> one specific register
6019/// RegisterClass -> a group of regs
6020/// Memory -> memory
6021/// Address -> a symbolic memory reference
6022/// Immediate -> immediate values
6023/// Other -> magic values (such as "Flag Output Operands")
6024/// Unknown -> something we don't recognize yet and can't handle
6025/// Ideally, we would pick the most specific constraint possible: if we have
6026/// something that fits into a register, we would pick it. The problem here
6027/// is that if we have something that could either be in a register or in
6028/// memory that use of the register could cause selection of *other*
6029/// operands to fail: they might only succeed if we pick memory. Because of
6030/// this the heuristic we use is:
6031///
6032/// 1) If there is an 'other' constraint, and if the operand is valid for
6033/// that constraint, use it. This makes us take advantage of 'i'
6034/// constraints when available.
6035/// 2) Otherwise, pick the most general constraint present. This prefers
6036/// 'm' over 'r', for example.
6037///
6038TargetLowering::ConstraintGroupTargetLowering::getConstraintPreferences(
6039TargetLowering::AsmOperandInfo &OpInfo) const{
6040ConstraintGroup Ret;
6041
6042 Ret.reserve(OpInfo.Codes.size());
6043for (StringRef Code : OpInfo.Codes) {
6044TargetLowering::ConstraintType CType = getConstraintType(Code);
6045
6046// Indirect 'other' or 'immediate' constraints are not allowed.
6047if (OpInfo.isIndirect && !(CType ==TargetLowering::C_Memory ||
6048 CType ==TargetLowering::C_Register ||
6049 CType ==TargetLowering::C_RegisterClass))
6050continue;
6051
6052// Things with matching constraints can only be registers, per gcc
6053// documentation. This mainly affects "g" constraints.
6054if (CType ==TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6055continue;
6056
6057 Ret.emplace_back(Code, CType);
6058 }
6059
6060 std::stable_sort(
6061 Ret.begin(), Ret.end(), [](ConstraintPair a,ConstraintPair b) {
6062 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6063 });
6064
6065return Ret;
6066}
6067
6068/// If we have an immediate, see if we can lower it. Return true if we can,
6069/// false otherwise.
6070staticboollowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6071SDValueOp,SelectionDAG *DAG,
6072constTargetLowering &TLI) {
6073
6074assert((P.second ==TargetLowering::C_Other ||
6075P.second ==TargetLowering::C_Immediate) &&
6076"need immediate or other");
6077
6078if (!Op.getNode())
6079returnfalse;
6080
6081 std::vector<SDValue> ResultOps;
6082 TLI.LowerAsmOperandForConstraint(Op,P.first, ResultOps, *DAG);
6083return !ResultOps.empty();
6084}
6085
6086/// Determines the constraint code and constraint type to use for the specific
6087/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6088voidTargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6089SDValueOp,
6090SelectionDAG *DAG) const{
6091assert(!OpInfo.Codes.empty() &&"Must have at least one constraint");
6092
6093// Single-letter constraints ('r') are very common.
6094if (OpInfo.Codes.size() == 1) {
6095 OpInfo.ConstraintCode = OpInfo.Codes[0];
6096 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6097 }else {
6098ConstraintGroupG = getConstraintPreferences(OpInfo);
6099if (G.empty())
6100return;
6101
6102unsigned BestIdx = 0;
6103for (constunsigned E =G.size();
6104 BestIdx < E && (G[BestIdx].second ==TargetLowering::C_Other ||
6105G[BestIdx].second ==TargetLowering::C_Immediate);
6106 ++BestIdx) {
6107if (lowerImmediateIfPossible(G[BestIdx],Op, DAG, *this))
6108break;
6109// If we're out of constraints, just pick the first one.
6110if (BestIdx + 1 == E) {
6111 BestIdx = 0;
6112break;
6113 }
6114 }
6115
6116 OpInfo.ConstraintCode =G[BestIdx].first;
6117 OpInfo.ConstraintType =G[BestIdx].second;
6118 }
6119
6120// 'X' matches anything.
6121if (OpInfo.ConstraintCode =="X" && OpInfo.CallOperandVal) {
6122// Constants are handled elsewhere. For Functions, the type here is the
6123// type of the result, which is not what we want to look at; leave them
6124// alone.
6125Value *v = OpInfo.CallOperandVal;
6126if (isa<ConstantInt>(v) || isa<Function>(v)) {
6127return;
6128 }
6129
6130if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6131 OpInfo.ConstraintCode ="i";
6132return;
6133 }
6134
6135// Otherwise, try to resolve it to something we know about by looking at
6136// the actual operand type.
6137if (constchar *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6138 OpInfo.ConstraintCode = Repl;
6139 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6140 }
6141 }
6142}
6143
6144/// Given an exact SDIV by a constant, create a multiplication
6145/// with the multiplicative inverse of the constant.
6146/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6147staticSDValueBuildExactSDIV(constTargetLowering &TLI,SDNode *N,
6148constSDLoc &dl,SelectionDAG &DAG,
6149SmallVectorImpl<SDNode *> &Created) {
6150SDValue Op0 =N->getOperand(0);
6151SDValue Op1 =N->getOperand(1);
6152EVT VT =N->getValueType(0);
6153EVT SVT = VT.getScalarType();
6154EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6155EVT ShSVT = ShVT.getScalarType();
6156
6157bool UseSRA =false;
6158SmallVector<SDValue, 16> Shifts, Factors;
6159
6160auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6161if (C->isZero())
6162returnfalse;
6163APInt Divisor =C->getAPIntValue();
6164unsigned Shift = Divisor.countr_zero();
6165if (Shift) {
6166 Divisor.ashrInPlace(Shift);
6167 UseSRA =true;
6168 }
6169APInt Factor = Divisor.multiplicativeInverse();
6170 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6171 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6172returntrue;
6173 };
6174
6175// Collect all magic values from the build vector.
6176if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6177returnSDValue();
6178
6179SDValue Shift, Factor;
6180if (Op1.getOpcode() ==ISD::BUILD_VECTOR) {
6181 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6182 Factor = DAG.getBuildVector(VT, dl, Factors);
6183 }elseif (Op1.getOpcode() ==ISD::SPLAT_VECTOR) {
6184assert(Shifts.size() == 1 && Factors.size() == 1 &&
6185"Expected matchUnaryPredicate to return one element for scalable "
6186"vectors");
6187 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6188 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6189 }else {
6190assert(isa<ConstantSDNode>(Op1) &&"Expected a constant");
6191 Shift = Shifts[0];
6192 Factor = Factors[0];
6193 }
6194
6195SDValue Res = Op0;
6196if (UseSRA) {
6197 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift,SDNodeFlags::Exact);
6198 Created.push_back(Res.getNode());
6199 }
6200
6201return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6202}
6203
6204/// Given an exact UDIV by a constant, create a multiplication
6205/// with the multiplicative inverse of the constant.
6206/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6207staticSDValueBuildExactUDIV(constTargetLowering &TLI,SDNode *N,
6208constSDLoc &dl,SelectionDAG &DAG,
6209SmallVectorImpl<SDNode *> &Created) {
6210EVT VT =N->getValueType(0);
6211EVT SVT = VT.getScalarType();
6212EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6213EVT ShSVT = ShVT.getScalarType();
6214
6215bool UseSRL =false;
6216SmallVector<SDValue, 16> Shifts, Factors;
6217
6218auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6219if (C->isZero())
6220returnfalse;
6221APInt Divisor =C->getAPIntValue();
6222unsigned Shift = Divisor.countr_zero();
6223if (Shift) {
6224 Divisor.lshrInPlace(Shift);
6225 UseSRL =true;
6226 }
6227// Calculate the multiplicative inverse modulo BW.
6228APInt Factor = Divisor.multiplicativeInverse();
6229 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6230 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6231returntrue;
6232 };
6233
6234SDValue Op1 =N->getOperand(1);
6235
6236// Collect all magic values from the build vector.
6237if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6238returnSDValue();
6239
6240SDValue Shift, Factor;
6241if (Op1.getOpcode() ==ISD::BUILD_VECTOR) {
6242 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6243 Factor = DAG.getBuildVector(VT, dl, Factors);
6244 }elseif (Op1.getOpcode() ==ISD::SPLAT_VECTOR) {
6245assert(Shifts.size() == 1 && Factors.size() == 1 &&
6246"Expected matchUnaryPredicate to return one element for scalable "
6247"vectors");
6248 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6249 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6250 }else {
6251assert(isa<ConstantSDNode>(Op1) &&"Expected a constant");
6252 Shift = Shifts[0];
6253 Factor = Factors[0];
6254 }
6255
6256SDValue Res =N->getOperand(0);
6257if (UseSRL) {
6258 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift,SDNodeFlags::Exact);
6259 Created.push_back(Res.getNode());
6260 }
6261
6262return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6263}
6264
6265SDValueTargetLowering::BuildSDIVPow2(SDNode *N,constAPInt &Divisor,
6266SelectionDAG &DAG,
6267SmallVectorImpl<SDNode *> &Created) const{
6268AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6269if (isIntDivCheap(N->getValueType(0), Attr))
6270returnSDValue(N, 0);// Lower SDIV as SDIV
6271returnSDValue();
6272}
6273
6274SDValue
6275TargetLowering::BuildSREMPow2(SDNode *N,constAPInt &Divisor,
6276SelectionDAG &DAG,
6277SmallVectorImpl<SDNode *> &Created) const{
6278AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6279if (isIntDivCheap(N->getValueType(0), Attr))
6280returnSDValue(N, 0);// Lower SREM as SREM
6281returnSDValue();
6282}
6283
6284/// Build sdiv by power-of-2 with conditional move instructions
6285/// Ref: "Hacker's Delight" by Henry Warren 10-1
6286/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6287/// bgez x, label
6288/// add x, x, 2**k-1
6289/// label:
6290/// sra res, x, k
6291/// neg res, res (when the divisor is negative)
6292SDValueTargetLowering::buildSDIVPow2WithCMov(
6293SDNode *N,constAPInt &Divisor,SelectionDAG &DAG,
6294SmallVectorImpl<SDNode *> &Created) const{
6295unsigned Lg2 = Divisor.countr_zero();
6296EVT VT =N->getValueType(0);
6297
6298SDLocDL(N);
6299SDValue N0 =N->getOperand(0);
6300SDValue Zero = DAG.getConstant(0,DL, VT);
6301APInt Lg2Mask =APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6302SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask,DL, VT);
6303
6304// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6305EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6306SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero,ISD::SETLT);
6307SDValueAdd = DAG.getNode(ISD::ADD,DL, VT, N0, Pow2MinusOne);
6308SDValue CMov = DAG.getNode(ISD::SELECT,DL, VT, Cmp,Add, N0);
6309
6310 Created.push_back(Cmp.getNode());
6311 Created.push_back(Add.getNode());
6312 Created.push_back(CMov.getNode());
6313
6314// Divide by pow2.
6315SDValue SRA =
6316 DAG.getNode(ISD::SRA,DL, VT, CMov, DAG.getConstant(Lg2,DL, VT));
6317
6318// If we're dividing by a positive value, we're done. Otherwise, we must
6319// negate the result.
6320if (Divisor.isNonNegative())
6321return SRA;
6322
6323 Created.push_back(SRA.getNode());
6324return DAG.getNode(ISD::SUB,DL, VT, Zero, SRA);
6325}
6326
6327/// Given an ISD::SDIV node expressing a divide by constant,
6328/// return a DAG expression to select that will generate the same value by
6329/// multiplying by a magic number.
6330/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6331SDValueTargetLowering::BuildSDIV(SDNode *N,SelectionDAG &DAG,
6332bool IsAfterLegalization,
6333bool IsAfterLegalTypes,
6334SmallVectorImpl<SDNode *> &Created) const{
6335SDLoc dl(N);
6336EVT VT =N->getValueType(0);
6337EVT SVT = VT.getScalarType();
6338EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
6339EVT ShSVT = ShVT.getScalarType();
6340unsigned EltBits = VT.getScalarSizeInBits();
6341EVT MulVT;
6342
6343// Check to see if we can do this.
6344// FIXME: We should be more aggressive here.
6345if (!isTypeLegal(VT)) {
6346// Limit this to simple scalars for now.
6347if (VT.isVector() || !VT.isSimple())
6348returnSDValue();
6349
6350// If this type will be promoted to a large enough type with a legal
6351// multiply operation, we can go ahead and do this transform.
6352if (getTypeAction(VT.getSimpleVT()) !=TypePromoteInteger)
6353returnSDValue();
6354
6355 MulVT =getTypeToTransformTo(*DAG.getContext(), VT);
6356if (MulVT.getSizeInBits() < (2 * EltBits) ||
6357 !isOperationLegal(ISD::MUL, MulVT))
6358returnSDValue();
6359 }
6360
6361// If the sdiv has an 'exact' bit we can use a simpler lowering.
6362if (N->getFlags().hasExact())
6363returnBuildExactSDIV(*this,N, dl, DAG, Created);
6364
6365SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6366
6367auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6368if (C->isZero())
6369returnfalse;
6370
6371constAPInt &Divisor =C->getAPIntValue();
6372SignedDivisionByConstantInfo magics =SignedDivisionByConstantInfo::get(Divisor);
6373int NumeratorFactor = 0;
6374int ShiftMask = -1;
6375
6376if (Divisor.isOne() || Divisor.isAllOnes()) {
6377// If d is +1/-1, we just multiply the numerator by +1/-1.
6378 NumeratorFactor = Divisor.getSExtValue();
6379 magics.Magic = 0;
6380 magics.ShiftAmount = 0;
6381 ShiftMask = 0;
6382 }elseif (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6383// If d > 0 and m < 0, add the numerator.
6384 NumeratorFactor = 1;
6385 }elseif (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6386// If d < 0 and m > 0, subtract the numerator.
6387 NumeratorFactor = -1;
6388 }
6389
6390 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6391 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6392 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6393 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6394returntrue;
6395 };
6396
6397SDValue N0 =N->getOperand(0);
6398SDValue N1 =N->getOperand(1);
6399
6400// Collect the shifts / magic values from each element.
6401if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6402returnSDValue();
6403
6404SDValue MagicFactor, Factor, Shift, ShiftMask;
6405if (N1.getOpcode() ==ISD::BUILD_VECTOR) {
6406 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6407 Factor = DAG.getBuildVector(VT, dl, Factors);
6408 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6409 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6410 }elseif (N1.getOpcode() ==ISD::SPLAT_VECTOR) {
6411assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6412 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6413"Expected matchUnaryPredicate to return one element for scalable "
6414"vectors");
6415 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6416 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6417 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6418 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6419 }else {
6420assert(isa<ConstantSDNode>(N1) &&"Expected a constant");
6421 MagicFactor = MagicFactors[0];
6422 Factor = Factors[0];
6423 Shift = Shifts[0];
6424 ShiftMask = ShiftMasks[0];
6425 }
6426
6427// Multiply the numerator (operand 0) by the magic value.
6428// FIXME: We should support doing a MUL in a wider type.
6429auto GetMULHS = [&](SDValueX,SDValueY) {
6430// If the type isn't legal, use a wider mul of the type calculated
6431// earlier.
6432if (!isTypeLegal(VT)) {
6433X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT,X);
6434Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT,Y);
6435Y = DAG.getNode(ISD::MUL, dl, MulVT,X,Y);
6436Y = DAG.getNode(ISD::SRL, dl, MulVT,Y,
6437 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6438return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);
6439 }
6440
6441if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6442return DAG.getNode(ISD::MULHS, dl, VT,X,Y);
6443if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6444SDValue LoHi =
6445 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),X,Y);
6446returnSDValue(LoHi.getNode(), 1);
6447 }
6448// If type twice as wide legal, widen and use a mul plus a shift.
6449unsignedSize = VT.getScalarSizeInBits();
6450EVT WideVT =EVT::getIntegerVT(*DAG.getContext(),Size * 2);
6451if (VT.isVector())
6452 WideVT =EVT::getVectorVT(*DAG.getContext(), WideVT,
6453 VT.getVectorElementCount());
6454// Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6455// custom lowered. This is very expensive so avoid it at all costs for
6456// constant divisors.
6457if ((!IsAfterLegalTypes &&isOperationExpand(ISD::SDIV, VT) &&
6458isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||
6459isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6460X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT,X);
6461Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT,Y);
6462Y = DAG.getNode(ISD::MUL, dl, WideVT,X,Y);
6463Y = DAG.getNode(ISD::SRL, dl, WideVT,Y,
6464 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6465return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);
6466 }
6467returnSDValue();
6468 };
6469
6470SDValue Q = GetMULHS(N0, MagicFactor);
6471if (!Q)
6472returnSDValue();
6473
6474 Created.push_back(Q.getNode());
6475
6476// (Optionally) Add/subtract the numerator using Factor.
6477 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6478 Created.push_back(Factor.getNode());
6479 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6480 Created.push_back(Q.getNode());
6481
6482// Shift right algebraic by shift value.
6483 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6484 Created.push_back(Q.getNode());
6485
6486// Extract the sign bit, mask it and add it to the quotient.
6487SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6488SDValueT = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6489 Created.push_back(T.getNode());
6490T = DAG.getNode(ISD::AND, dl, VT,T, ShiftMask);
6491 Created.push_back(T.getNode());
6492return DAG.getNode(ISD::ADD, dl, VT, Q,T);
6493}
6494
6495/// Given an ISD::UDIV node expressing a divide by constant,
6496/// return a DAG expression to select that will generate the same value by
6497/// multiplying by a magic number.
6498/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6499SDValueTargetLowering::BuildUDIV(SDNode *N,SelectionDAG &DAG,
6500bool IsAfterLegalization,
6501bool IsAfterLegalTypes,
6502SmallVectorImpl<SDNode *> &Created) const{
6503SDLoc dl(N);
6504EVT VT =N->getValueType(0);
6505EVT SVT = VT.getScalarType();
6506EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
6507EVT ShSVT = ShVT.getScalarType();
6508unsigned EltBits = VT.getScalarSizeInBits();
6509EVT MulVT;
6510
6511// Check to see if we can do this.
6512// FIXME: We should be more aggressive here.
6513if (!isTypeLegal(VT)) {
6514// Limit this to simple scalars for now.
6515if (VT.isVector() || !VT.isSimple())
6516returnSDValue();
6517
6518// If this type will be promoted to a large enough type with a legal
6519// multiply operation, we can go ahead and do this transform.
6520if (getTypeAction(VT.getSimpleVT()) !=TypePromoteInteger)
6521returnSDValue();
6522
6523 MulVT =getTypeToTransformTo(*DAG.getContext(), VT);
6524if (MulVT.getSizeInBits() < (2 * EltBits) ||
6525 !isOperationLegal(ISD::MUL, MulVT))
6526returnSDValue();
6527 }
6528
6529// If the udiv has an 'exact' bit we can use a simpler lowering.
6530if (N->getFlags().hasExact())
6531returnBuildExactUDIV(*this,N, dl, DAG, Created);
6532
6533SDValue N0 =N->getOperand(0);
6534SDValue N1 =N->getOperand(1);
6535
6536// Try to use leading zeros of the dividend to reduce the multiplier and
6537// avoid expensive fixups.
6538unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6539
6540bool UseNPQ =false, UsePreShift =false, UsePostShift =false;
6541SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6542
6543auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6544if (C->isZero())
6545returnfalse;
6546constAPInt& Divisor =C->getAPIntValue();
6547
6548SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6549
6550// Magic algorithm doesn't work for division by 1. We need to emit a select
6551// at the end.
6552if (Divisor.isOne()) {
6553 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6554 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6555 }else {
6556UnsignedDivisionByConstantInfo magics =
6557UnsignedDivisionByConstantInfo::get(
6558 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6559
6560 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6561
6562assert(magics.PreShift < Divisor.getBitWidth() &&
6563"We shouldn't generate an undefined shift!");
6564assert(magics.PostShift < Divisor.getBitWidth() &&
6565"We shouldn't generate an undefined shift!");
6566assert((!magics.IsAdd || magics.PreShift == 0) &&
6567"Unexpected pre-shift");
6568 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6569 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6570 NPQFactor = DAG.getConstant(
6571 magics.IsAdd ?APInt::getOneBitSet(EltBits, EltBits - 1)
6572 :APInt::getZero(EltBits),
6573 dl, SVT);
6574 UseNPQ |= magics.IsAdd;
6575 UsePreShift |= magics.PreShift != 0;
6576 UsePostShift |= magics.PostShift != 0;
6577 }
6578
6579 PreShifts.push_back(PreShift);
6580 MagicFactors.push_back(MagicFactor);
6581 NPQFactors.push_back(NPQFactor);
6582 PostShifts.push_back(PostShift);
6583returntrue;
6584 };
6585
6586// Collect the shifts/magic values from each element.
6587if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6588returnSDValue();
6589
6590SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6591if (N1.getOpcode() ==ISD::BUILD_VECTOR) {
6592 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6593 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6594 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6595 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6596 }elseif (N1.getOpcode() ==ISD::SPLAT_VECTOR) {
6597assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6598 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6599"Expected matchUnaryPredicate to return one for scalable vectors");
6600 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6601 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6602 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6603 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6604 }else {
6605assert(isa<ConstantSDNode>(N1) &&"Expected a constant");
6606 PreShift = PreShifts[0];
6607 MagicFactor = MagicFactors[0];
6608 PostShift = PostShifts[0];
6609 }
6610
6611SDValue Q = N0;
6612if (UsePreShift) {
6613 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6614 Created.push_back(Q.getNode());
6615 }
6616
6617// FIXME: We should support doing a MUL in a wider type.
6618auto GetMULHU = [&](SDValueX,SDValueY) {
6619// If the type isn't legal, use a wider mul of the type calculated
6620// earlier.
6621if (!isTypeLegal(VT)) {
6622X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT,X);
6623Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT,Y);
6624Y = DAG.getNode(ISD::MUL, dl, MulVT,X,Y);
6625Y = DAG.getNode(ISD::SRL, dl, MulVT,Y,
6626 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6627return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);
6628 }
6629
6630if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6631return DAG.getNode(ISD::MULHU, dl, VT,X,Y);
6632if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6633SDValue LoHi =
6634 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),X,Y);
6635returnSDValue(LoHi.getNode(), 1);
6636 }
6637// If type twice as wide legal, widen and use a mul plus a shift.
6638unsignedSize = VT.getScalarSizeInBits();
6639EVT WideVT =EVT::getIntegerVT(*DAG.getContext(),Size * 2);
6640if (VT.isVector())
6641 WideVT =EVT::getVectorVT(*DAG.getContext(), WideVT,
6642 VT.getVectorElementCount());
6643// Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6644// custom lowered. This is very expensive so avoid it at all costs for
6645// constant divisors.
6646if ((!IsAfterLegalTypes &&isOperationExpand(ISD::UDIV, VT) &&
6647isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||
6648isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6649X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT,X);
6650Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT,Y);
6651Y = DAG.getNode(ISD::MUL, dl, WideVT,X,Y);
6652Y = DAG.getNode(ISD::SRL, dl, WideVT,Y,
6653 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6654return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);
6655 }
6656returnSDValue();// No mulhu or equivalent
6657 };
6658
6659// Multiply the numerator (operand 0) by the magic value.
6660 Q = GetMULHU(Q, MagicFactor);
6661if (!Q)
6662returnSDValue();
6663
6664 Created.push_back(Q.getNode());
6665
6666if (UseNPQ) {
6667SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6668 Created.push_back(NPQ.getNode());
6669
6670// For vectors we might have a mix of non-NPQ/NPQ paths, so use
6671// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6672if (VT.isVector())
6673 NPQ = GetMULHU(NPQ, NPQFactor);
6674else
6675 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6676
6677 Created.push_back(NPQ.getNode());
6678
6679 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6680 Created.push_back(Q.getNode());
6681 }
6682
6683if (UsePostShift) {
6684 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6685 Created.push_back(Q.getNode());
6686 }
6687
6688EVT SetCCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6689
6690SDValue One = DAG.getConstant(1, dl, VT);
6691SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One,ISD::SETEQ);
6692return DAG.getSelect(dl, VT, IsOne, N0, Q);
6693}
6694
6695/// If all values in Values that *don't* match the predicate are same 'splat'
6696/// value, then replace all values with that splat value.
6697/// Else, if AlternativeReplacement was provided, then replace all values that
6698/// do match predicate with AlternativeReplacement value.
6699staticvoid
6700turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6701 std::function<bool(SDValue)>Predicate,
6702SDValue AlternativeReplacement =SDValue()) {
6703SDValue Replacement;
6704// Is there a value for which the Predicate does *NOT* match? What is it?
6705auto SplatValue =llvm::find_if_not(Values,Predicate);
6706if (SplatValue != Values.end()) {
6707// Does Values consist only of SplatValue's and values matching Predicate?
6708if (llvm::all_of(Values, [Predicate, SplatValue](SDValueValue) {
6709returnValue == *SplatValue ||Predicate(Value);
6710 }))// Then we shall replace values matching predicate with SplatValue.
6711 Replacement = *SplatValue;
6712 }
6713if (!Replacement) {
6714// Oops, we did not find the "baseline" splat value.
6715if (!AlternativeReplacement)
6716return;// Nothing to do.
6717// Let's replace with provided value then.
6718 Replacement = AlternativeReplacement;
6719 }
6720 std::replace_if(Values.begin(), Values.end(),Predicate, Replacement);
6721}
6722
6723/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6724/// where the divisor is constant and the comparison target is zero,
6725/// return a DAG expression that will generate the same comparison result
6726/// using only multiplications, additions and shifts/rotations.
6727/// Ref: "Hacker's Delight" 10-17.
6728SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT,SDValue REMNode,
6729SDValue CompTargetNode,
6730ISD::CondCodeCond,
6731 DAGCombinerInfo &DCI,
6732constSDLoc &DL) const{
6733SmallVector<SDNode *, 5> Built;
6734if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode,Cond,
6735 DCI,DL, Built)) {
6736for (SDNode *N : Built)
6737 DCI.AddToWorklist(N);
6738return Folded;
6739 }
6740
6741returnSDValue();
6742}
6743
6744SDValue
6745TargetLowering::prepareUREMEqFold(EVT SETCCVT,SDValue REMNode,
6746SDValue CompTargetNode,ISD::CondCodeCond,
6747 DAGCombinerInfo &DCI,constSDLoc &DL,
6748SmallVectorImpl<SDNode *> &Created) const{
6749// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6750// - D must be constant, with D = D0 * 2^K where D0 is odd
6751// - P is the multiplicative inverse of D0 modulo 2^W
6752// - Q = floor(((2^W) - 1) / D)
6753// where W is the width of the common type of N and D.
6754assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
6755"Only applicable for (in)equality comparisons.");
6756
6757SelectionDAG &DAG = DCI.DAG;
6758
6759EVT VT = REMNode.getValueType();
6760EVT SVT = VT.getScalarType();
6761EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
6762EVT ShSVT = ShVT.getScalarType();
6763
6764// If MUL is unavailable, we cannot proceed in any case.
6765if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6766returnSDValue();
6767
6768bool ComparingWithAllZeros =true;
6769bool AllComparisonsWithNonZerosAreTautological =true;
6770bool HadTautologicalLanes =false;
6771bool AllLanesAreTautological =true;
6772bool HadEvenDivisor =false;
6773bool AllDivisorsArePowerOfTwo =true;
6774bool HadTautologicalInvertedLanes =false;
6775SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6776
6777auto BuildUREMPattern = [&](ConstantSDNode *CDiv,ConstantSDNode *CCmp) {
6778// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6779if (CDiv->isZero())
6780returnfalse;
6781
6782constAPInt &D = CDiv->getAPIntValue();
6783constAPInt &Cmp = CCmp->getAPIntValue();
6784
6785 ComparingWithAllZeros &=Cmp.isZero();
6786
6787// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6788// if C2 is not less than C1, the comparison is always false.
6789// But we will only be able to produce the comparison that will give the
6790// opposive tautological answer. So this lane would need to be fixed up.
6791bool TautologicalInvertedLane =D.ule(Cmp);
6792 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6793
6794// If all lanes are tautological (either all divisors are ones, or divisor
6795// is not greater than the constant we are comparing with),
6796// we will prefer to avoid the fold.
6797bool TautologicalLane =D.isOne() || TautologicalInvertedLane;
6798 HadTautologicalLanes |= TautologicalLane;
6799 AllLanesAreTautological &= TautologicalLane;
6800
6801// If we are comparing with non-zero, we need'll need to subtract said
6802// comparison value from the LHS. But there is no point in doing that if
6803// every lane where we are comparing with non-zero is tautological..
6804if (!Cmp.isZero())
6805 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6806
6807// Decompose D into D0 * 2^K
6808unsignedK =D.countr_zero();
6809assert((!D.isOne() || (K == 0)) &&"For divisor '1' we won't rotate.");
6810APInt D0 =D.lshr(K);
6811
6812// D is even if it has trailing zeros.
6813 HadEvenDivisor |= (K != 0);
6814// D is a power-of-two if D0 is one.
6815// If all divisors are power-of-two, we will prefer to avoid the fold.
6816 AllDivisorsArePowerOfTwo &= D0.isOne();
6817
6818// P = inv(D0, 2^W)
6819// 2^W requires W + 1 bits, so we have to extend and then truncate.
6820unsignedW =D.getBitWidth();
6821APIntP = D0.multiplicativeInverse();
6822assert((D0 *P).isOne() &&"Multiplicative inverse basic check failed.");
6823
6824// Q = floor((2^W - 1) u/ D)
6825// R = ((2^W - 1) u% D)
6826APInt Q,R;
6827APInt::udivrem(APInt::getAllOnes(W),D, Q, R);
6828
6829// If we are comparing with zero, then that comparison constant is okay,
6830// else it may need to be one less than that.
6831if (Cmp.ugt(R))
6832 Q -= 1;
6833
6834assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6835"We are expecting that K is always less than all-ones for ShSVT");
6836
6837// If the lane is tautological the result can be constant-folded.
6838if (TautologicalLane) {
6839// Set P and K amount to a bogus values so we can try to splat them.
6840P = 0;
6841K = -1;
6842// And ensure that comparison constant is tautological,
6843// it will always compare true/false.
6844 Q = -1;
6845 }
6846
6847 PAmts.push_back(DAG.getConstant(P,DL, SVT));
6848 KAmts.push_back(
6849 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K,/*isSigned=*/false,
6850/*implicitTrunc=*/true),
6851DL, ShSVT));
6852 QAmts.push_back(DAG.getConstant(Q,DL, SVT));
6853returntrue;
6854 };
6855
6856SDValueN = REMNode.getOperand(0);
6857SDValueD = REMNode.getOperand(1);
6858
6859// Collect the values from each element.
6860if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6861returnSDValue();
6862
6863// If all lanes are tautological, the result can be constant-folded.
6864if (AllLanesAreTautological)
6865returnSDValue();
6866
6867// If this is a urem by a powers-of-two, avoid the fold since it can be
6868// best implemented as a bit test.
6869if (AllDivisorsArePowerOfTwo)
6870returnSDValue();
6871
6872SDValue PVal, KVal, QVal;
6873if (D.getOpcode() ==ISD::BUILD_VECTOR) {
6874if (HadTautologicalLanes) {
6875// Try to turn PAmts into a splat, since we don't care about the values
6876// that are currently '0'. If we can't, just keep '0'`s.
6877turnVectorIntoSplatVector(PAmts,isNullConstant);
6878// Try to turn KAmts into a splat, since we don't care about the values
6879// that are currently '-1'. If we can't, change them to '0'`s.
6880turnVectorIntoSplatVector(KAmts,isAllOnesConstant,
6881 DAG.getConstant(0,DL, ShSVT));
6882 }
6883
6884 PVal = DAG.getBuildVector(VT,DL, PAmts);
6885 KVal = DAG.getBuildVector(ShVT,DL, KAmts);
6886 QVal = DAG.getBuildVector(VT,DL, QAmts);
6887 }elseif (D.getOpcode() ==ISD::SPLAT_VECTOR) {
6888assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6889"Expected matchBinaryPredicate to return one element for "
6890"SPLAT_VECTORs");
6891 PVal = DAG.getSplatVector(VT,DL, PAmts[0]);
6892 KVal = DAG.getSplatVector(ShVT,DL, KAmts[0]);
6893 QVal = DAG.getSplatVector(VT,DL, QAmts[0]);
6894 }else {
6895 PVal = PAmts[0];
6896 KVal = KAmts[0];
6897 QVal = QAmts[0];
6898 }
6899
6900if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6901if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6902returnSDValue();// FIXME: Could/should use `ISD::ADD`?
6903assert(CompTargetNode.getValueType() ==N.getValueType() &&
6904"Expecting that the types on LHS and RHS of comparisons match.");
6905N = DAG.getNode(ISD::SUB,DL, VT,N, CompTargetNode);
6906 }
6907
6908// (mul N, P)
6909SDValue Op0 = DAG.getNode(ISD::MUL,DL, VT,N, PVal);
6910 Created.push_back(Op0.getNode());
6911
6912// Rotate right only if any divisor was even. We avoid rotates for all-odd
6913// divisors as a performance improvement, since rotating by 0 is a no-op.
6914if (HadEvenDivisor) {
6915// We need ROTR to do this.
6916if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6917returnSDValue();
6918// UREM: (rotr (mul N, P), K)
6919 Op0 = DAG.getNode(ISD::ROTR,DL, VT, Op0, KVal);
6920 Created.push_back(Op0.getNode());
6921 }
6922
6923// UREM: (setule/setugt (rotr (mul N, P), K), Q)
6924SDValue NewCC =
6925 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6926 ((Cond ==ISD::SETEQ) ?ISD::SETULE :ISD::SETUGT));
6927if (!HadTautologicalInvertedLanes)
6928return NewCC;
6929
6930// If any lanes previously compared always-false, the NewCC will give
6931// always-true result for them, so we need to fixup those lanes.
6932// Or the other way around for inequality predicate.
6933assert(VT.isVector() &&"Can/should only get here for vectors.");
6934 Created.push_back(NewCC.getNode());
6935
6936// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6937// if C2 is not less than C1, the comparison is always false.
6938// But we have produced the comparison that will give the
6939// opposive tautological answer. So these lanes would need to be fixed up.
6940SDValue TautologicalInvertedChannels =
6941 DAG.getSetCC(DL, SETCCVT,D, CompTargetNode,ISD::SETULE);
6942 Created.push_back(TautologicalInvertedChannels.getNode());
6943
6944// NOTE: we avoid letting illegal types through even if we're before legalize
6945// ops – legalization has a hard time producing good code for this.
6946if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6947// If we have a vector select, let's replace the comparison results in the
6948// affected lanes with the correct tautological result.
6949SDValue Replacement = DAG.getBoolConstant(Cond ==ISD::SETEQ ?false :true,
6950DL, SETCCVT, SETCCVT);
6951return DAG.getNode(ISD::VSELECT,DL, SETCCVT, TautologicalInvertedChannels,
6952 Replacement, NewCC);
6953 }
6954
6955// Else, we can just invert the comparison result in the appropriate lanes.
6956//
6957// NOTE: see the note above VSELECT above.
6958if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6959return DAG.getNode(ISD::XOR,DL, SETCCVT, NewCC,
6960 TautologicalInvertedChannels);
6961
6962returnSDValue();// Don't know how to lower.
6963}
6964
6965/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6966/// where the divisor is constant and the comparison target is zero,
6967/// return a DAG expression that will generate the same comparison result
6968/// using only multiplications, additions and shifts/rotations.
6969/// Ref: "Hacker's Delight" 10-17.
6970SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT,SDValue REMNode,
6971SDValue CompTargetNode,
6972ISD::CondCodeCond,
6973 DAGCombinerInfo &DCI,
6974constSDLoc &DL) const{
6975SmallVector<SDNode *, 7> Built;
6976if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode,Cond,
6977 DCI,DL, Built)) {
6978assert(Built.size() <= 7 &&"Max size prediction failed.");
6979for (SDNode *N : Built)
6980 DCI.AddToWorklist(N);
6981return Folded;
6982 }
6983
6984returnSDValue();
6985}
6986
6987SDValue
6988TargetLowering::prepareSREMEqFold(EVT SETCCVT,SDValue REMNode,
6989SDValue CompTargetNode,ISD::CondCodeCond,
6990 DAGCombinerInfo &DCI,constSDLoc &DL,
6991SmallVectorImpl<SDNode *> &Created) const{
6992// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6993// Fold:
6994// (seteq/ne (srem N, D), 0)
6995// To:
6996// (setule/ugt (rotr (add (mul N, P), A), K), Q)
6997//
6998// - D must be constant, with D = D0 * 2^K where D0 is odd
6999// - P is the multiplicative inverse of D0 modulo 2^W
7000// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7001// - Q = floor((2 * A) / (2^K))
7002// where W is the width of the common type of N and D.
7003//
7004// When D is a power of two (and thus D0 is 1), the normal
7005// formula for A and Q don't apply, because the derivation
7006// depends on D not dividing 2^(W-1), and thus theorem ZRS
7007// does not apply. This specifically fails when N = INT_MIN.
7008//
7009// Instead, for power-of-two D, we use:
7010// - A = 2^(W-1)
7011// |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7012// - Q = 2^(W-K) - 1
7013// |-> Test that the top K bits are zero after rotation
7014assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&
7015"Only applicable for (in)equality comparisons.");
7016
7017SelectionDAG &DAG = DCI.DAG;
7018
7019EVT VT = REMNode.getValueType();
7020EVT SVT = VT.getScalarType();
7021EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
7022EVT ShSVT = ShVT.getScalarType();
7023
7024// If we are after ops legalization, and MUL is unavailable, we can not
7025// proceed.
7026if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7027returnSDValue();
7028
7029// TODO: Could support comparing with non-zero too.
7030ConstantSDNode *CompTarget =isConstOrConstSplat(CompTargetNode);
7031if (!CompTarget || !CompTarget->isZero())
7032returnSDValue();
7033
7034bool HadIntMinDivisor =false;
7035bool HadOneDivisor =false;
7036bool AllDivisorsAreOnes =true;
7037bool HadEvenDivisor =false;
7038bool NeedToApplyOffset =false;
7039bool AllDivisorsArePowerOfTwo =true;
7040SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7041
7042auto BuildSREMPattern = [&](ConstantSDNode *C) {
7043// Division by 0 is UB. Leave it to be constant-folded elsewhere.
7044if (C->isZero())
7045returnfalse;
7046
7047// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7048
7049// WARNING: this fold is only valid for positive divisors!
7050APIntD =C->getAPIntValue();
7051if (D.isNegative())
7052D.negate();// `rem %X, -C` is equivalent to `rem %X, C`
7053
7054 HadIntMinDivisor |=D.isMinSignedValue();
7055
7056// If all divisors are ones, we will prefer to avoid the fold.
7057 HadOneDivisor |=D.isOne();
7058 AllDivisorsAreOnes &=D.isOne();
7059
7060// Decompose D into D0 * 2^K
7061unsignedK =D.countr_zero();
7062assert((!D.isOne() || (K == 0)) &&"For divisor '1' we won't rotate.");
7063APInt D0 =D.lshr(K);
7064
7065if (!D.isMinSignedValue()) {
7066// D is even if it has trailing zeros; unless it's INT_MIN, in which case
7067// we don't care about this lane in this fold, we'll special-handle it.
7068 HadEvenDivisor |= (K != 0);
7069 }
7070
7071// D is a power-of-two if D0 is one. This includes INT_MIN.
7072// If all divisors are power-of-two, we will prefer to avoid the fold.
7073 AllDivisorsArePowerOfTwo &= D0.isOne();
7074
7075// P = inv(D0, 2^W)
7076// 2^W requires W + 1 bits, so we have to extend and then truncate.
7077unsignedW =D.getBitWidth();
7078APIntP = D0.multiplicativeInverse();
7079assert((D0 *P).isOne() &&"Multiplicative inverse basic check failed.");
7080
7081// A = floor((2^(W - 1) - 1) / D0) & -2^K
7082APIntA =APInt::getSignedMaxValue(W).udiv(D0);
7083A.clearLowBits(K);
7084
7085if (!D.isMinSignedValue()) {
7086// If divisor INT_MIN, then we don't care about this lane in this fold,
7087// we'll special-handle it.
7088 NeedToApplyOffset |=A != 0;
7089 }
7090
7091// Q = floor((2 * A) / (2^K))
7092APInt Q = (2 *A).udiv(APInt::getOneBitSet(W, K));
7093
7094assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7095"We are expecting that A is always less than all-ones for SVT");
7096assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7097"We are expecting that K is always less than all-ones for ShSVT");
7098
7099// If D was a power of two, apply the alternate constant derivation.
7100if (D0.isOne()) {
7101// A = 2^(W-1)
7102A =APInt::getSignedMinValue(W);
7103// - Q = 2^(W-K) - 1
7104 Q =APInt::getAllOnes(W - K).zext(W);
7105 }
7106
7107// If the divisor is 1 the result can be constant-folded. Likewise, we
7108// don't care about INT_MIN lanes, those can be set to undef if appropriate.
7109if (D.isOne()) {
7110// Set P, A and K to a bogus values so we can try to splat them.
7111P = 0;
7112A = -1;
7113K = -1;
7114
7115// x ?% 1 == 0 <--> true <--> x u<= -1
7116 Q = -1;
7117 }
7118
7119 PAmts.push_back(DAG.getConstant(P,DL, SVT));
7120 AAmts.push_back(DAG.getConstant(A,DL, SVT));
7121 KAmts.push_back(
7122 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K,/*isSigned=*/false,
7123/*implicitTrunc=*/true),
7124DL, ShSVT));
7125 QAmts.push_back(DAG.getConstant(Q,DL, SVT));
7126returntrue;
7127 };
7128
7129SDValueN = REMNode.getOperand(0);
7130SDValueD = REMNode.getOperand(1);
7131
7132// Collect the values from each element.
7133if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7134returnSDValue();
7135
7136// If this is a srem by a one, avoid the fold since it can be constant-folded.
7137if (AllDivisorsAreOnes)
7138returnSDValue();
7139
7140// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7141// since it can be best implemented as a bit test.
7142if (AllDivisorsArePowerOfTwo)
7143returnSDValue();
7144
7145SDValue PVal, AVal, KVal, QVal;
7146if (D.getOpcode() ==ISD::BUILD_VECTOR) {
7147if (HadOneDivisor) {
7148// Try to turn PAmts into a splat, since we don't care about the values
7149// that are currently '0'. If we can't, just keep '0'`s.
7150turnVectorIntoSplatVector(PAmts,isNullConstant);
7151// Try to turn AAmts into a splat, since we don't care about the
7152// values that are currently '-1'. If we can't, change them to '0'`s.
7153turnVectorIntoSplatVector(AAmts,isAllOnesConstant,
7154 DAG.getConstant(0,DL, SVT));
7155// Try to turn KAmts into a splat, since we don't care about the values
7156// that are currently '-1'. If we can't, change them to '0'`s.
7157turnVectorIntoSplatVector(KAmts,isAllOnesConstant,
7158 DAG.getConstant(0,DL, ShSVT));
7159 }
7160
7161 PVal = DAG.getBuildVector(VT,DL, PAmts);
7162 AVal = DAG.getBuildVector(VT,DL, AAmts);
7163 KVal = DAG.getBuildVector(ShVT,DL, KAmts);
7164 QVal = DAG.getBuildVector(VT,DL, QAmts);
7165 }elseif (D.getOpcode() ==ISD::SPLAT_VECTOR) {
7166assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7167 QAmts.size() == 1 &&
7168"Expected matchUnaryPredicate to return one element for scalable "
7169"vectors");
7170 PVal = DAG.getSplatVector(VT,DL, PAmts[0]);
7171 AVal = DAG.getSplatVector(VT,DL, AAmts[0]);
7172 KVal = DAG.getSplatVector(ShVT,DL, KAmts[0]);
7173 QVal = DAG.getSplatVector(VT,DL, QAmts[0]);
7174 }else {
7175assert(isa<ConstantSDNode>(D) &&"Expected a constant");
7176 PVal = PAmts[0];
7177 AVal = AAmts[0];
7178 KVal = KAmts[0];
7179 QVal = QAmts[0];
7180 }
7181
7182// (mul N, P)
7183SDValue Op0 = DAG.getNode(ISD::MUL,DL, VT,N, PVal);
7184 Created.push_back(Op0.getNode());
7185
7186if (NeedToApplyOffset) {
7187// We need ADD to do this.
7188if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7189returnSDValue();
7190
7191// (add (mul N, P), A)
7192 Op0 = DAG.getNode(ISD::ADD,DL, VT, Op0, AVal);
7193 Created.push_back(Op0.getNode());
7194 }
7195
7196// Rotate right only if any divisor was even. We avoid rotates for all-odd
7197// divisors as a performance improvement, since rotating by 0 is a no-op.
7198if (HadEvenDivisor) {
7199// We need ROTR to do this.
7200if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7201returnSDValue();
7202// SREM: (rotr (add (mul N, P), A), K)
7203 Op0 = DAG.getNode(ISD::ROTR,DL, VT, Op0, KVal);
7204 Created.push_back(Op0.getNode());
7205 }
7206
7207// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7208SDValue Fold =
7209 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7210 ((Cond ==ISD::SETEQ) ?ISD::SETULE :ISD::SETUGT));
7211
7212// If we didn't have lanes with INT_MIN divisor, then we're done.
7213if (!HadIntMinDivisor)
7214return Fold;
7215
7216// That fold is only valid for positive divisors. Which effectively means,
7217// it is invalid for INT_MIN divisors. So if we have such a lane,
7218// we must fix-up results for said lanes.
7219assert(VT.isVector() &&"Can/should only get here for vectors.");
7220
7221// NOTE: we avoid letting illegal types through even if we're before legalize
7222// ops – legalization has a hard time producing good code for the code that
7223// follows.
7224if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7225 !isOperationLegalOrCustom(ISD::AND, VT) ||
7226 !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7227 !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7228returnSDValue();
7229
7230 Created.push_back(Fold.getNode());
7231
7232SDValue IntMin = DAG.getConstant(
7233APInt::getSignedMinValue(SVT.getScalarSizeInBits()),DL, VT);
7234SDValue IntMax = DAG.getConstant(
7235APInt::getSignedMaxValue(SVT.getScalarSizeInBits()),DL, VT);
7236SDValueZero =
7237 DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()),DL, VT);
7238
7239// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7240SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT,D, IntMin,ISD::SETEQ);
7241 Created.push_back(DivisorIsIntMin.getNode());
7242
7243// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7244SDValueMasked = DAG.getNode(ISD::AND,DL, VT,N, IntMax);
7245 Created.push_back(Masked.getNode());
7246SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT,Masked, Zero,Cond);
7247 Created.push_back(MaskedIsZero.getNode());
7248
7249// To produce final result we need to blend 2 vectors: 'SetCC' and
7250// 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7251// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7252// constant-folded, select can get lowered to a shuffle with constant mask.
7253SDValue Blended = DAG.getNode(ISD::VSELECT,DL, SETCCVT, DivisorIsIntMin,
7254 MaskedIsZero, Fold);
7255
7256return Blended;
7257}
7258
7259boolTargetLowering::
7260verifyReturnAddressArgumentIsConstant(SDValueOp,SelectionDAG &DAG) const{
7261if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7262 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7263"be a constant integer");
7264returntrue;
7265 }
7266
7267returnfalse;
7268}
7269
7270SDValueTargetLowering::getSqrtInputTest(SDValueOp,SelectionDAG &DAG,
7271constDenormalMode &Mode) const{
7272SDLocDL(Op);
7273EVT VT =Op.getValueType();
7274EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7275SDValue FPZero = DAG.getConstantFP(0.0,DL, VT);
7276
7277// This is specifically a check for the handling of denormal inputs, not the
7278// result.
7279if (Mode.Input ==DenormalMode::PreserveSign ||
7280 Mode.Input ==DenormalMode::PositiveZero) {
7281// Test = X == 0.0
7282return DAG.getSetCC(DL, CCVT,Op, FPZero,ISD::SETEQ);
7283 }
7284
7285// Testing it with denormal inputs to avoid wrong estimate.
7286//
7287// Test = fabs(X) < SmallestNormal
7288constfltSemantics &FltSem = VT.getFltSemantics();
7289APFloat SmallestNorm =APFloat::getSmallestNormalized(FltSem);
7290SDValue NormC = DAG.getConstantFP(SmallestNorm,DL, VT);
7291SDValue Fabs = DAG.getNode(ISD::FABS,DL, VT,Op);
7292return DAG.getSetCC(DL, CCVT, Fabs, NormC,ISD::SETLT);
7293}
7294
7295SDValueTargetLowering::getNegatedExpression(SDValueOp,SelectionDAG &DAG,
7296bool LegalOps,bool OptForSize,
7297NegatibleCost &Cost,
7298unsignedDepth) const{
7299// fneg is removable even if it has multiple uses.
7300if (Op.getOpcode() ==ISD::FNEG ||Op.getOpcode() == ISD::VP_FNEG) {
7301Cost =NegatibleCost::Cheaper;
7302returnOp.getOperand(0);
7303 }
7304
7305// Don't recurse exponentially.
7306if (Depth >SelectionDAG::MaxRecursionDepth)
7307returnSDValue();
7308
7309// Pre-increment recursion depth for use in recursive calls.
7310 ++Depth;
7311constSDNodeFlags Flags =Op->getFlags();
7312constTargetOptions &Options = DAG.getTarget().Options;
7313EVT VT =Op.getValueType();
7314unsigned Opcode =Op.getOpcode();
7315
7316// Don't allow anything with multiple uses unless we know it is free.
7317if (!Op.hasOneUse() && Opcode !=ISD::ConstantFP) {
7318bool IsFreeExtend = Opcode ==ISD::FP_EXTEND &&
7319isFPExtFree(VT,Op.getOperand(0).getValueType());
7320if (!IsFreeExtend)
7321returnSDValue();
7322 }
7323
7324auto RemoveDeadNode = [&](SDValueN) {
7325if (N &&N.getNode()->use_empty())
7326 DAG.RemoveDeadNode(N.getNode());
7327 };
7328
7329SDLocDL(Op);
7330
7331// Because getNegatedExpression can delete nodes we need a handle to keep
7332// temporary nodes alive in case the recursion manages to create an identical
7333// node.
7334 std::list<HandleSDNode> Handles;
7335
7336switch (Opcode) {
7337caseISD::ConstantFP: {
7338// Don't invert constant FP values after legalization unless the target says
7339// the negated constant is legal.
7340bool IsOpLegal =
7341isOperationLegal(ISD::ConstantFP, VT) ||
7342isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7343 OptForSize);
7344
7345if (LegalOps && !IsOpLegal)
7346break;
7347
7348APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7349 V.changeSign();
7350SDValue CFP = DAG.getConstantFP(V,DL, VT);
7351
7352// If we already have the use of the negated floating constant, it is free
7353// to negate it even it has multiple uses.
7354if (!Op.hasOneUse() && CFP.use_empty())
7355break;
7356Cost =NegatibleCost::Neutral;
7357return CFP;
7358 }
7359caseISD::BUILD_VECTOR: {
7360// Only permit BUILD_VECTOR of constants.
7361if (llvm::any_of(Op->op_values(), [&](SDValueN) {
7362 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7363 }))
7364break;
7365
7366bool IsOpLegal =
7367 (isOperationLegal(ISD::ConstantFP, VT) &&
7368isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7369llvm::all_of(Op->op_values(), [&](SDValueN) {
7370returnN.isUndef() ||
7371isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7372 OptForSize);
7373 });
7374
7375if (LegalOps && !IsOpLegal)
7376break;
7377
7378SmallVector<SDValue, 4> Ops;
7379for (SDValueC :Op->op_values()) {
7380if (C.isUndef()) {
7381 Ops.push_back(C);
7382continue;
7383 }
7384APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7385 V.changeSign();
7386 Ops.push_back(DAG.getConstantFP(V,DL,C.getValueType()));
7387 }
7388Cost =NegatibleCost::Neutral;
7389return DAG.getBuildVector(VT,DL, Ops);
7390 }
7391caseISD::FADD: {
7392if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7393break;
7394
7395// After operation legalization, it might not be legal to create new FSUBs.
7396if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7397break;
7398SDValueX =Op.getOperand(0),Y =Op.getOperand(1);
7399
7400// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7401NegatibleCost CostX =NegatibleCost::Expensive;
7402SDValue NegX =
7403 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX,Depth);
7404// Prevent this node from being deleted by the next call.
7405if (NegX)
7406 Handles.emplace_back(NegX);
7407
7408// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7409NegatibleCost CostY =NegatibleCost::Expensive;
7410SDValue NegY =
7411 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY,Depth);
7412
7413// We're done with the handles.
7414 Handles.clear();
7415
7416// Negate the X if its cost is less or equal than Y.
7417if (NegX && (CostX <= CostY)) {
7418Cost = CostX;
7419SDValueN = DAG.getNode(ISD::FSUB,DL, VT, NegX,Y, Flags);
7420if (NegY !=N)
7421 RemoveDeadNode(NegY);
7422returnN;
7423 }
7424
7425// Negate the Y if it is not expensive.
7426if (NegY) {
7427Cost = CostY;
7428SDValueN = DAG.getNode(ISD::FSUB,DL, VT, NegY,X, Flags);
7429if (NegX !=N)
7430 RemoveDeadNode(NegX);
7431returnN;
7432 }
7433break;
7434 }
7435caseISD::FSUB: {
7436// We can't turn -(A-B) into B-A when we honor signed zeros.
7437if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7438break;
7439
7440SDValueX =Op.getOperand(0),Y =Op.getOperand(1);
7441// fold (fneg (fsub 0, Y)) -> Y
7442if (ConstantFPSDNode *C =isConstOrConstSplatFP(X,/*AllowUndefs*/true))
7443if (C->isZero()) {
7444Cost =NegatibleCost::Cheaper;
7445returnY;
7446 }
7447
7448// fold (fneg (fsub X, Y)) -> (fsub Y, X)
7449Cost =NegatibleCost::Neutral;
7450return DAG.getNode(ISD::FSUB,DL, VT,Y,X, Flags);
7451 }
7452caseISD::FMUL:
7453caseISD::FDIV: {
7454SDValueX =Op.getOperand(0),Y =Op.getOperand(1);
7455
7456// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7457NegatibleCost CostX =NegatibleCost::Expensive;
7458SDValue NegX =
7459 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX,Depth);
7460// Prevent this node from being deleted by the next call.
7461if (NegX)
7462 Handles.emplace_back(NegX);
7463
7464// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7465NegatibleCost CostY =NegatibleCost::Expensive;
7466SDValue NegY =
7467 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY,Depth);
7468
7469// We're done with the handles.
7470 Handles.clear();
7471
7472// Negate the X if its cost is less or equal than Y.
7473if (NegX && (CostX <= CostY)) {
7474Cost = CostX;
7475SDValueN = DAG.getNode(Opcode,DL, VT, NegX,Y, Flags);
7476if (NegY !=N)
7477 RemoveDeadNode(NegY);
7478returnN;
7479 }
7480
7481// Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7482if (auto *C =isConstOrConstSplatFP(Op.getOperand(1)))
7483if (C->isExactlyValue(2.0) &&Op.getOpcode() ==ISD::FMUL)
7484break;
7485
7486// Negate the Y if it is not expensive.
7487if (NegY) {
7488Cost = CostY;
7489SDValueN = DAG.getNode(Opcode,DL, VT,X, NegY, Flags);
7490if (NegX !=N)
7491 RemoveDeadNode(NegX);
7492returnN;
7493 }
7494break;
7495 }
7496caseISD::FMA:
7497caseISD::FMAD: {
7498if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7499break;
7500
7501SDValueX =Op.getOperand(0),Y =Op.getOperand(1), Z =Op.getOperand(2);
7502NegatibleCost CostZ =NegatibleCost::Expensive;
7503SDValue NegZ =
7504 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ,Depth);
7505// Give up if fail to negate the Z.
7506if (!NegZ)
7507break;
7508
7509// Prevent this node from being deleted by the next two calls.
7510 Handles.emplace_back(NegZ);
7511
7512// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7513NegatibleCost CostX =NegatibleCost::Expensive;
7514SDValue NegX =
7515 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX,Depth);
7516// Prevent this node from being deleted by the next call.
7517if (NegX)
7518 Handles.emplace_back(NegX);
7519
7520// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7521NegatibleCost CostY =NegatibleCost::Expensive;
7522SDValue NegY =
7523 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY,Depth);
7524
7525// We're done with the handles.
7526 Handles.clear();
7527
7528// Negate the X if its cost is less or equal than Y.
7529if (NegX && (CostX <= CostY)) {
7530Cost = std::min(CostX, CostZ);
7531SDValueN = DAG.getNode(Opcode,DL, VT, NegX,Y, NegZ, Flags);
7532if (NegY !=N)
7533 RemoveDeadNode(NegY);
7534returnN;
7535 }
7536
7537// Negate the Y if it is not expensive.
7538if (NegY) {
7539Cost = std::min(CostY, CostZ);
7540SDValueN = DAG.getNode(Opcode,DL, VT,X, NegY, NegZ, Flags);
7541if (NegX !=N)
7542 RemoveDeadNode(NegX);
7543returnN;
7544 }
7545break;
7546 }
7547
7548caseISD::FP_EXTEND:
7549caseISD::FSIN:
7550if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7551 OptForSize,Cost,Depth))
7552return DAG.getNode(Opcode,DL, VT, NegV);
7553break;
7554caseISD::FP_ROUND:
7555if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7556 OptForSize,Cost,Depth))
7557return DAG.getNode(ISD::FP_ROUND,DL, VT, NegV,Op.getOperand(1));
7558break;
7559caseISD::SELECT:
7560caseISD::VSELECT: {
7561// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7562// iff at least one cost is cheaper and the other is neutral/cheaper
7563SDValueLHS =Op.getOperand(1);
7564NegatibleCost CostLHS =NegatibleCost::Expensive;
7565SDValue NegLHS =
7566 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS,Depth);
7567if (!NegLHS || CostLHS >NegatibleCost::Neutral) {
7568 RemoveDeadNode(NegLHS);
7569break;
7570 }
7571
7572// Prevent this node from being deleted by the next call.
7573 Handles.emplace_back(NegLHS);
7574
7575SDValueRHS =Op.getOperand(2);
7576NegatibleCost CostRHS =NegatibleCost::Expensive;
7577SDValue NegRHS =
7578 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS,Depth);
7579
7580// We're done with the handles.
7581 Handles.clear();
7582
7583if (!NegRHS || CostRHS >NegatibleCost::Neutral ||
7584 (CostLHS !=NegatibleCost::Cheaper &&
7585 CostRHS !=NegatibleCost::Cheaper)) {
7586 RemoveDeadNode(NegLHS);
7587 RemoveDeadNode(NegRHS);
7588break;
7589 }
7590
7591Cost = std::min(CostLHS, CostRHS);
7592return DAG.getSelect(DL, VT,Op.getOperand(0), NegLHS, NegRHS);
7593 }
7594 }
7595
7596returnSDValue();
7597}
7598
7599//===----------------------------------------------------------------------===//
7600// Legalization Utilities
7601//===----------------------------------------------------------------------===//
7602
7603boolTargetLowering::expandMUL_LOHI(unsigned Opcode,EVT VT,constSDLoc &dl,
7604SDValue LHS,SDValue RHS,
7605SmallVectorImpl<SDValue> &Result,
7606EVT HiLoVT,SelectionDAG &DAG,
7607MulExpansionKind Kind,SDValue LL,
7608SDValue LH,SDValue RL,SDValue RH) const{
7609assert(Opcode ==ISD::MUL || Opcode ==ISD::UMUL_LOHI ||
7610 Opcode ==ISD::SMUL_LOHI);
7611
7612bool HasMULHS = (Kind ==MulExpansionKind::Always) ||
7613isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7614bool HasMULHU = (Kind ==MulExpansionKind::Always) ||
7615isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7616bool HasSMUL_LOHI = (Kind ==MulExpansionKind::Always) ||
7617isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7618bool HasUMUL_LOHI = (Kind ==MulExpansionKind::Always) ||
7619isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7620
7621if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7622returnfalse;
7623
7624unsigned OuterBitSize = VT.getScalarSizeInBits();
7625unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7626
7627// LL, LH, RL, and RH must be either all NULL or all set to a value.
7628assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7629 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7630
7631SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7632auto MakeMUL_LOHI = [&](SDValue L,SDValue R,SDValue &Lo,SDValue &Hi,
7633boolSigned) ->bool {
7634if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7635Lo = DAG.getNode(Signed ?ISD::SMUL_LOHI :ISD::UMUL_LOHI, dl, VTs, L, R);
7636Hi =SDValue(Lo.getNode(), 1);
7637returntrue;
7638 }
7639if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7640Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7641Hi = DAG.getNode(Signed ?ISD::MULHS :ISD::MULHU, dl, HiLoVT, L, R);
7642returntrue;
7643 }
7644returnfalse;
7645 };
7646
7647SDValueLo,Hi;
7648
7649if (!LL.getNode() && !RL.getNode() &&
7650isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7651 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT,LHS);
7652 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT,RHS);
7653 }
7654
7655if (!LL.getNode())
7656returnfalse;
7657
7658APInt HighMask =APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7659if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7660 DAG.MaskedValueIsZero(RHS, HighMask)) {
7661// The inputs are both zero-extended.
7662if (MakeMUL_LOHI(LL, RL,Lo,Hi,false)) {
7663 Result.push_back(Lo);
7664 Result.push_back(Hi);
7665if (Opcode !=ISD::MUL) {
7666SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7667 Result.push_back(Zero);
7668 Result.push_back(Zero);
7669 }
7670returntrue;
7671 }
7672 }
7673
7674if (!VT.isVector() && Opcode ==ISD::MUL &&
7675 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7676 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7677// The input values are both sign-extended.
7678// TODO non-MUL case?
7679if (MakeMUL_LOHI(LL, RL,Lo,Hi,true)) {
7680 Result.push_back(Lo);
7681 Result.push_back(Hi);
7682returntrue;
7683 }
7684 }
7685
7686unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7687SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7688
7689if (!LH.getNode() && !RH.getNode() &&
7690isOperationLegalOrCustom(ISD::SRL, VT) &&
7691isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7692 LH = DAG.getNode(ISD::SRL, dl, VT,LHS, Shift);
7693 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7694 RH = DAG.getNode(ISD::SRL, dl, VT,RHS, Shift);
7695 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7696 }
7697
7698if (!LH.getNode())
7699returnfalse;
7700
7701if (!MakeMUL_LOHI(LL, RL,Lo,Hi,false))
7702returnfalse;
7703
7704 Result.push_back(Lo);
7705
7706if (Opcode ==ISD::MUL) {
7707 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7708 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7709Hi = DAG.getNode(ISD::ADD, dl, HiLoVT,Hi, RH);
7710Hi = DAG.getNode(ISD::ADD, dl, HiLoVT,Hi, LH);
7711 Result.push_back(Hi);
7712returntrue;
7713 }
7714
7715// Compute the full width result.
7716autoMerge = [&](SDValueLo,SDValueHi) ->SDValue {
7717Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Lo);
7718Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Hi);
7719Hi = DAG.getNode(ISD::SHL, dl, VT,Hi, Shift);
7720return DAG.getNode(ISD::OR, dl, VT,Lo,Hi);
7721 };
7722
7723SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Hi);
7724if (!MakeMUL_LOHI(LL, RH,Lo,Hi,false))
7725returnfalse;
7726
7727// This is effectively the add part of a multiply-add of half-sized operands,
7728// so it cannot overflow.
7729 Next = DAG.getNode(ISD::ADD, dl, VT, Next,Merge(Lo,Hi));
7730
7731if (!MakeMUL_LOHI(LH, RL,Lo,Hi,false))
7732returnfalse;
7733
7734SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7735EVT BoolType =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7736
7737bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7738isOperationLegalOrCustom(ISD::ADDE, VT));
7739if (UseGlue)
7740 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7741Merge(Lo,Hi));
7742else
7743 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7744Merge(Lo,Hi), DAG.getConstant(0, dl, BoolType));
7745
7746SDValue Carry = Next.getValue(1);
7747 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7748 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7749
7750if (!MakeMUL_LOHI(LH, RH,Lo,Hi, Opcode ==ISD::SMUL_LOHI))
7751returnfalse;
7752
7753if (UseGlue)
7754Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue),Hi, Zero,
7755 Carry);
7756else
7757Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType),Hi,
7758 Zero, Carry);
7759
7760 Next = DAG.getNode(ISD::ADD, dl, VT, Next,Merge(Lo,Hi));
7761
7762if (Opcode ==ISD::SMUL_LOHI) {
7763SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7764 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7765 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next,ISD::SETLT);
7766
7767 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7768 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7769 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next,ISD::SETLT);
7770 }
7771
7772 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7773 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7774 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7775returntrue;
7776}
7777
7778boolTargetLowering::expandMUL(SDNode *N,SDValue &Lo,SDValue &Hi,EVT HiLoVT,
7779SelectionDAG &DAG,MulExpansionKind Kind,
7780SDValue LL,SDValue LH,SDValue RL,
7781SDValue RH) const{
7782SmallVector<SDValue, 2> Result;
7783bool Ok = expandMUL_LOHI(N->getOpcode(),N->getValueType(0),SDLoc(N),
7784N->getOperand(0),N->getOperand(1), Result, HiLoVT,
7785 DAG, Kind, LL, LH, RL, RH);
7786if (Ok) {
7787assert(Result.size() == 2);
7788Lo = Result[0];
7789Hi = Result[1];
7790 }
7791return Ok;
7792}
7793
7794// Optimize unsigned division or remainder by constants for types twice as large
7795// as a legal VT.
7796//
7797// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7798// can be computed
7799// as:
7800// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7801// Remainder = Sum % Constant
7802// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7803//
7804// For division, we can compute the remainder using the algorithm described
7805// above, subtract it from the dividend to get an exact multiple of Constant.
7806// Then multiply that exact multiply by the multiplicative inverse modulo
7807// (1 << (BitWidth / 2)) to get the quotient.
7808
7809// If Constant is even, we can shift right the dividend and the divisor by the
7810// number of trailing zeros in Constant before applying the remainder algorithm.
7811// If we're after the quotient, we can subtract this value from the shifted
7812// dividend and multiply by the multiplicative inverse of the shifted divisor.
7813// If we want the remainder, we shift the value left by the number of trailing
7814// zeros and add the bits that were shifted out of the dividend.
7815boolTargetLowering::expandDIVREMByConstant(SDNode *N,
7816SmallVectorImpl<SDValue> &Result,
7817EVT HiLoVT,SelectionDAG &DAG,
7818SDValue LL,SDValue LH) const{
7819unsigned Opcode =N->getOpcode();
7820EVT VT =N->getValueType(0);
7821
7822// TODO: Support signed division/remainder.
7823if (Opcode ==ISD::SREM || Opcode ==ISD::SDIV || Opcode ==ISD::SDIVREM)
7824returnfalse;
7825assert(
7826 (Opcode ==ISD::UREM || Opcode ==ISD::UDIV || Opcode ==ISD::UDIVREM) &&
7827"Unexpected opcode");
7828
7829auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7830if (!CN)
7831returnfalse;
7832
7833APInt Divisor = CN->getAPIntValue();
7834unsignedBitWidth = Divisor.getBitWidth();
7835unsigned HBitWidth =BitWidth / 2;
7836assert(VT.getScalarSizeInBits() ==BitWidth &&
7837 HiLoVT.getScalarSizeInBits() == HBitWidth &&"Unexpected VTs");
7838
7839// Divisor needs to less than (1 << HBitWidth).
7840APInt HalfMaxPlus1 =APInt::getOneBitSet(BitWidth, HBitWidth);
7841if (Divisor.uge(HalfMaxPlus1))
7842returnfalse;
7843
7844// We depend on the UREM by constant optimization in DAGCombiner that requires
7845// high multiply.
7846if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7847 !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7848returnfalse;
7849
7850// Don't expand if optimizing for size.
7851if (DAG.shouldOptForSize())
7852returnfalse;
7853
7854// Early out for 0 or 1 divisors.
7855if (Divisor.ule(1))
7856returnfalse;
7857
7858// If the divisor is even, shift it until it becomes odd.
7859unsigned TrailingZeros = 0;
7860if (!Divisor[0]) {
7861 TrailingZeros = Divisor.countr_zero();
7862 Divisor.lshrInPlace(TrailingZeros);
7863 }
7864
7865SDLoc dl(N);
7866SDValue Sum;
7867SDValue PartialRem;
7868
7869// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7870// then add in the carry.
7871// TODO: If we can't split it in half, we might be able to split into 3 or
7872// more pieces using a smaller bit width.
7873if (HalfMaxPlus1.urem(Divisor).isOne()) {
7874assert(!LL == !LH &&"Expected both input halves or no input halves!");
7875if (!LL)
7876 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7877
7878// Shift the input by the number of TrailingZeros in the divisor. The
7879// shifted out bits will be added to the remainder later.
7880if (TrailingZeros) {
7881// Save the shifted off bits if we need the remainder.
7882if (Opcode !=ISD::UDIV) {
7883APInt Mask =APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7884 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7885 DAG.getConstant(Mask, dl, HiLoVT));
7886 }
7887
7888 LL = DAG.getNode(
7889ISD::OR, dl, HiLoVT,
7890 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7891 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7892 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7893 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7894 HiLoVT, dl)));
7895 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7896 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7897 }
7898
7899// Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7900EVT SetCCType =
7901getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7902if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7903SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7904 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7905 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7906 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7907 }else {
7908 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7909SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL,ISD::SETULT);
7910// If the boolean for the target is 0 or 1, we can add the setcc result
7911// directly.
7912if (getBooleanContents(HiLoVT) ==
7913TargetLoweringBase::ZeroOrOneBooleanContent)
7914 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7915else
7916 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7917 DAG.getConstant(0, dl, HiLoVT));
7918 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7919 }
7920 }
7921
7922// If we didn't find a sum, we can't do the expansion.
7923if (!Sum)
7924returnfalse;
7925
7926// Perform a HiLoVT urem on the Sum using truncated divisor.
7927SDValue RemL =
7928 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7929 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7930SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7931
7932if (Opcode !=ISD::UREM) {
7933// Subtract the remainder from the shifted dividend.
7934SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7935SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7936
7937 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7938
7939// Multiply by the multiplicative inverse of the divisor modulo
7940// (1 << BitWidth).
7941APInt MulFactor = Divisor.multiplicativeInverse();
7942
7943SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7944 DAG.getConstant(MulFactor, dl, VT));
7945
7946// Split the quotient into low and high parts.
7947SDValue QuotL, QuotH;
7948 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7949 Result.push_back(QuotL);
7950 Result.push_back(QuotH);
7951 }
7952
7953if (Opcode !=ISD::UDIV) {
7954// If we shifted the input, shift the remainder left and add the bits we
7955// shifted off the input.
7956if (TrailingZeros) {
7957APInt Mask =APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7958 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7959 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7960 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7961 }
7962 Result.push_back(RemL);
7963 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7964 }
7965
7966returntrue;
7967}
7968
7969// Check that (every element of) Z is undef or not an exact multiple of BW.
7970staticboolisNonZeroModBitWidthOrUndef(SDValue Z,unsigned BW) {
7971returnISD::matchUnaryPredicate(
7972 Z,
7973 [=](ConstantSDNode *C) {return !C ||C->getAPIntValue().urem(BW) != 0; },
7974true);
7975}
7976
7977staticSDValueexpandVPFunnelShift(SDNode *Node,SelectionDAG &DAG) {
7978EVT VT =Node->getValueType(0);
7979SDValue ShX, ShY;
7980SDValue ShAmt, InvShAmt;
7981SDValueX =Node->getOperand(0);
7982SDValueY =Node->getOperand(1);
7983SDValue Z =Node->getOperand(2);
7984SDValue Mask =Node->getOperand(3);
7985SDValue VL =Node->getOperand(4);
7986
7987unsigned BW = VT.getScalarSizeInBits();
7988bool IsFSHL =Node->getOpcode() == ISD::VP_FSHL;
7989SDLocDL(SDValue(Node, 0));
7990
7991EVT ShVT = Z.getValueType();
7992if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7993// fshl: X << C | Y >> (BW - C)
7994// fshr: X << (BW - C) | Y >> C
7995// where C = Z % BW is not zero
7996SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);
7997 ShAmt = DAG.getNode(ISD::VP_UREM,DL, ShVT, Z, BitWidthC, Mask, VL);
7998 InvShAmt = DAG.getNode(ISD::VP_SUB,DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7999 ShX = DAG.getNode(ISD::VP_SHL,DL, VT,X, IsFSHL ? ShAmt : InvShAmt, Mask,
8000 VL);
8001 ShY = DAG.getNode(ISD::VP_SRL,DL, VT,Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8002 VL);
8003 }else {
8004// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8005// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8006SDValue BitMask = DAG.getConstant(BW - 1,DL, ShVT);
8007if (isPowerOf2_32(BW)) {
8008// Z % BW -> Z & (BW - 1)
8009 ShAmt = DAG.getNode(ISD::VP_AND,DL, ShVT, Z, BitMask, Mask, VL);
8010// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8011SDValue NotZ = DAG.getNode(ISD::VP_XOR,DL, ShVT, Z,
8012 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8013 InvShAmt = DAG.getNode(ISD::VP_AND,DL, ShVT, NotZ, BitMask, Mask, VL);
8014 }else {
8015SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);
8016 ShAmt = DAG.getNode(ISD::VP_UREM,DL, ShVT, Z, BitWidthC, Mask, VL);
8017 InvShAmt = DAG.getNode(ISD::VP_SUB,DL, ShVT, BitMask, ShAmt, Mask, VL);
8018 }
8019
8020SDValue One = DAG.getConstant(1,DL, ShVT);
8021if (IsFSHL) {
8022 ShX = DAG.getNode(ISD::VP_SHL,DL, VT,X, ShAmt, Mask, VL);
8023SDValue ShY1 = DAG.getNode(ISD::VP_SRL,DL, VT,Y, One, Mask, VL);
8024 ShY = DAG.getNode(ISD::VP_SRL,DL, VT, ShY1, InvShAmt, Mask, VL);
8025 }else {
8026SDValue ShX1 = DAG.getNode(ISD::VP_SHL,DL, VT,X, One, Mask, VL);
8027 ShX = DAG.getNode(ISD::VP_SHL,DL, VT, ShX1, InvShAmt, Mask, VL);
8028 ShY = DAG.getNode(ISD::VP_SRL,DL, VT,Y, ShAmt, Mask, VL);
8029 }
8030 }
8031return DAG.getNode(ISD::VP_OR,DL, VT, ShX, ShY, Mask, VL);
8032}
8033
8034SDValueTargetLowering::expandFunnelShift(SDNode *Node,
8035SelectionDAG &DAG) const{
8036if (Node->isVPOpcode())
8037returnexpandVPFunnelShift(Node, DAG);
8038
8039EVT VT = Node->getValueType(0);
8040
8041if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8042 !isOperationLegalOrCustom(ISD::SRL, VT) ||
8043 !isOperationLegalOrCustom(ISD::SUB, VT) ||
8044 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
8045returnSDValue();
8046
8047SDValueX = Node->getOperand(0);
8048SDValueY = Node->getOperand(1);
8049SDValue Z = Node->getOperand(2);
8050
8051unsigned BW = VT.getScalarSizeInBits();
8052bool IsFSHL = Node->getOpcode() ==ISD::FSHL;
8053SDLocDL(SDValue(Node, 0));
8054
8055EVT ShVT = Z.getValueType();
8056
8057// If a funnel shift in the other direction is more supported, use it.
8058unsigned RevOpcode = IsFSHL ?ISD::FSHR :ISD::FSHL;
8059if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8060isOperationLegalOrCustom(RevOpcode, VT) &&isPowerOf2_32(BW)) {
8061if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8062// fshl X, Y, Z -> fshr X, Y, -Z
8063// fshr X, Y, Z -> fshl X, Y, -Z
8064SDValue Zero = DAG.getConstant(0,DL, ShVT);
8065 Z = DAG.getNode(ISD::SUB,DL, VT, Zero, Z);
8066 }else {
8067// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8068// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8069SDValue One = DAG.getConstant(1,DL, ShVT);
8070if (IsFSHL) {
8071Y = DAG.getNode(RevOpcode,DL, VT,X,Y, One);
8072X = DAG.getNode(ISD::SRL,DL, VT,X, One);
8073 }else {
8074X = DAG.getNode(RevOpcode,DL, VT,X,Y, One);
8075Y = DAG.getNode(ISD::SHL,DL, VT,Y, One);
8076 }
8077 Z = DAG.getNOT(DL, Z, ShVT);
8078 }
8079return DAG.getNode(RevOpcode,DL, VT,X,Y, Z);
8080 }
8081
8082SDValue ShX, ShY;
8083SDValue ShAmt, InvShAmt;
8084if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8085// fshl: X << C | Y >> (BW - C)
8086// fshr: X << (BW - C) | Y >> C
8087// where C = Z % BW is not zero
8088SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);
8089 ShAmt = DAG.getNode(ISD::UREM,DL, ShVT, Z, BitWidthC);
8090 InvShAmt = DAG.getNode(ISD::SUB,DL, ShVT, BitWidthC, ShAmt);
8091 ShX = DAG.getNode(ISD::SHL,DL, VT,X, IsFSHL ? ShAmt : InvShAmt);
8092 ShY = DAG.getNode(ISD::SRL,DL, VT,Y, IsFSHL ? InvShAmt : ShAmt);
8093 }else {
8094// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8095// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8096SDValue Mask = DAG.getConstant(BW - 1,DL, ShVT);
8097if (isPowerOf2_32(BW)) {
8098// Z % BW -> Z & (BW - 1)
8099 ShAmt = DAG.getNode(ISD::AND,DL, ShVT, Z, Mask);
8100// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8101 InvShAmt = DAG.getNode(ISD::AND,DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8102 }else {
8103SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);
8104 ShAmt = DAG.getNode(ISD::UREM,DL, ShVT, Z, BitWidthC);
8105 InvShAmt = DAG.getNode(ISD::SUB,DL, ShVT, Mask, ShAmt);
8106 }
8107
8108SDValue One = DAG.getConstant(1,DL, ShVT);
8109if (IsFSHL) {
8110 ShX = DAG.getNode(ISD::SHL,DL, VT,X, ShAmt);
8111SDValue ShY1 = DAG.getNode(ISD::SRL,DL, VT,Y, One);
8112 ShY = DAG.getNode(ISD::SRL,DL, VT, ShY1, InvShAmt);
8113 }else {
8114SDValue ShX1 = DAG.getNode(ISD::SHL,DL, VT,X, One);
8115 ShX = DAG.getNode(ISD::SHL,DL, VT, ShX1, InvShAmt);
8116 ShY = DAG.getNode(ISD::SRL,DL, VT,Y, ShAmt);
8117 }
8118 }
8119return DAG.getNode(ISD::OR,DL, VT, ShX, ShY);
8120}
8121
8122// TODO: Merge with expandFunnelShift.
8123SDValueTargetLowering::expandROT(SDNode *Node,bool AllowVectorOps,
8124SelectionDAG &DAG) const{
8125EVT VT = Node->getValueType(0);
8126unsigned EltSizeInBits = VT.getScalarSizeInBits();
8127bool IsLeft = Node->getOpcode() ==ISD::ROTL;
8128SDValue Op0 = Node->getOperand(0);
8129SDValue Op1 = Node->getOperand(1);
8130SDLocDL(SDValue(Node, 0));
8131
8132EVT ShVT = Op1.getValueType();
8133SDValue Zero = DAG.getConstant(0,DL, ShVT);
8134
8135// If a rotate in the other direction is more supported, use it.
8136unsigned RevRot = IsLeft ?ISD::ROTR :ISD::ROTL;
8137if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8138isOperationLegalOrCustom(RevRot, VT) &&isPowerOf2_32(EltSizeInBits)) {
8139SDValue Sub = DAG.getNode(ISD::SUB,DL, ShVT, Zero, Op1);
8140return DAG.getNode(RevRot,DL, VT, Op0, Sub);
8141 }
8142
8143if (!AllowVectorOps && VT.isVector() &&
8144 (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8145 !isOperationLegalOrCustom(ISD::SRL, VT) ||
8146 !isOperationLegalOrCustom(ISD::SUB, VT) ||
8147 !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8148 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8149returnSDValue();
8150
8151unsigned ShOpc = IsLeft ?ISD::SHL :ISD::SRL;
8152unsigned HsOpc = IsLeft ?ISD::SRL :ISD::SHL;
8153SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1,DL, ShVT);
8154SDValue ShVal;
8155SDValue HsVal;
8156if (isPowerOf2_32(EltSizeInBits)) {
8157// (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8158// (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8159SDValue NegOp1 = DAG.getNode(ISD::SUB,DL, ShVT, Zero, Op1);
8160SDValue ShAmt = DAG.getNode(ISD::AND,DL, ShVT, Op1, BitWidthMinusOneC);
8161 ShVal = DAG.getNode(ShOpc,DL, VT, Op0, ShAmt);
8162SDValue HsAmt = DAG.getNode(ISD::AND,DL, ShVT, NegOp1, BitWidthMinusOneC);
8163 HsVal = DAG.getNode(HsOpc,DL, VT, Op0, HsAmt);
8164 }else {
8165// (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8166// (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8167SDValue BitWidthC = DAG.getConstant(EltSizeInBits,DL, ShVT);
8168SDValue ShAmt = DAG.getNode(ISD::UREM,DL, ShVT, Op1, BitWidthC);
8169 ShVal = DAG.getNode(ShOpc,DL, VT, Op0, ShAmt);
8170SDValue HsAmt = DAG.getNode(ISD::SUB,DL, ShVT, BitWidthMinusOneC, ShAmt);
8171SDValue One = DAG.getConstant(1,DL, ShVT);
8172 HsVal =
8173 DAG.getNode(HsOpc,DL, VT, DAG.getNode(HsOpc,DL, VT, Op0, One), HsAmt);
8174 }
8175return DAG.getNode(ISD::OR,DL, VT, ShVal, HsVal);
8176}
8177
8178voidTargetLowering::expandShiftParts(SDNode *Node,SDValue &Lo,SDValue &Hi,
8179SelectionDAG &DAG) const{
8180assert(Node->getNumOperands() == 3 &&"Not a double-shift!");
8181EVT VT = Node->getValueType(0);
8182unsigned VTBits = VT.getScalarSizeInBits();
8183assert(isPowerOf2_32(VTBits) &&"Power-of-two integer type expected");
8184
8185bool IsSHL = Node->getOpcode() ==ISD::SHL_PARTS;
8186bool IsSRA = Node->getOpcode() ==ISD::SRA_PARTS;
8187SDValue ShOpLo = Node->getOperand(0);
8188SDValue ShOpHi = Node->getOperand(1);
8189SDValue ShAmt = Node->getOperand(2);
8190EVT ShAmtVT = ShAmt.getValueType();
8191EVT ShAmtCCVT =
8192getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8193SDLoc dl(Node);
8194
8195// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8196// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8197// away during isel.
8198SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8199 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8200SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8201 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8202 : DAG.getConstant(0, dl, VT);
8203
8204SDValue Tmp2, Tmp3;
8205if (IsSHL) {
8206 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8207 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8208 }else {
8209 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8210 Tmp3 = DAG.getNode(IsSRA ?ISD::SRA :ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8211 }
8212
8213// If the shift amount is larger or equal than the width of a part we don't
8214// use the result from the FSHL/FSHR. Insert a test and select the appropriate
8215// values for large shift amounts.
8216SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8217 DAG.getConstant(VTBits, dl, ShAmtVT));
8218SDValueCond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8219 DAG.getConstant(0, dl, ShAmtVT),ISD::SETNE);
8220
8221if (IsSHL) {
8222Hi = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp3, Tmp2);
8223Lo = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp1, Tmp3);
8224 }else {
8225Lo = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp3, Tmp2);
8226Hi = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp1, Tmp3);
8227 }
8228}
8229
8230boolTargetLowering::expandFP_TO_SINT(SDNode *Node,SDValue &Result,
8231SelectionDAG &DAG) const{
8232unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8233SDValue Src = Node->getOperand(OpNo);
8234EVT SrcVT = Src.getValueType();
8235EVT DstVT = Node->getValueType(0);
8236SDLoc dl(SDValue(Node, 0));
8237
8238// FIXME: Only f32 to i64 conversions are supported.
8239if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8240returnfalse;
8241
8242if (Node->isStrictFPOpcode())
8243// When a NaN is converted to an integer a trap is allowed. We can't
8244// use this expansion here because it would eliminate that trap. Other
8245// traps are also allowed and cannot be eliminated. See
8246// IEEE 754-2008 sec 5.8.
8247returnfalse;
8248
8249// Expand f32 -> i64 conversion
8250// This algorithm comes from compiler-rt's implementation of fixsfdi:
8251// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8252unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8253EVT IntVT = SrcVT.changeTypeToInteger();
8254EVT IntShVT =getShiftAmountTy(IntVT, DAG.getDataLayout());
8255
8256SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8257SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8258SDValue Bias = DAG.getConstant(127, dl, IntVT);
8259SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8260SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8261SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8262
8263SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8264
8265SDValue ExponentBits = DAG.getNode(
8266ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8267 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8268SDValueExponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8269
8270SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8271 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8272 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8273 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8274
8275SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8276 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8277 DAG.getConstant(0x00800000, dl, IntVT));
8278
8279 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8280
8281 R = DAG.getSelectCC(
8282 dl,Exponent, ExponentLoBit,
8283 DAG.getNode(ISD::SHL, dl, DstVT, R,
8284 DAG.getZExtOrTrunc(
8285 DAG.getNode(ISD::SUB, dl, IntVT,Exponent, ExponentLoBit),
8286 dl, IntShVT)),
8287 DAG.getNode(ISD::SRL, dl, DstVT, R,
8288 DAG.getZExtOrTrunc(
8289 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit,Exponent),
8290 dl, IntShVT)),
8291ISD::SETGT);
8292
8293SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8294 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8295
8296 Result = DAG.getSelectCC(dl,Exponent, DAG.getConstant(0, dl, IntVT),
8297 DAG.getConstant(0, dl, DstVT), Ret,ISD::SETLT);
8298returntrue;
8299}
8300
8301boolTargetLowering::expandFP_TO_UINT(SDNode *Node,SDValue &Result,
8302SDValue &Chain,
8303SelectionDAG &DAG) const{
8304SDLoc dl(SDValue(Node, 0));
8305unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8306SDValue Src = Node->getOperand(OpNo);
8307
8308EVT SrcVT = Src.getValueType();
8309EVT DstVT = Node->getValueType(0);
8310EVT SetCCVT =
8311getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8312EVT DstSetCCVT =
8313getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8314
8315// Only expand vector types if we have the appropriate vector bit operations.
8316unsigned SIntOpcode = Node->isStrictFPOpcode() ?ISD::STRICT_FP_TO_SINT :
8317ISD::FP_TO_SINT;
8318if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8319 !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8320returnfalse;
8321
8322// If the maximum float value is smaller then the signed integer range,
8323// the destination signmask can't be represented by the float, so we can
8324// just use FP_TO_SINT directly.
8325constfltSemantics &APFSem = SrcVT.getFltSemantics();
8326APFloat APF(APFSem,APInt::getZero(SrcVT.getScalarSizeInBits()));
8327APInt SignMask =APInt::getSignMask(DstVT.getScalarSizeInBits());
8328if (APFloat::opOverflow &
8329 APF.convertFromAPInt(SignMask,false,APFloat::rmNearestTiesToEven)) {
8330if (Node->isStrictFPOpcode()) {
8331 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8332 { Node->getOperand(0), Src });
8333 Chain = Result.getValue(1);
8334 }else
8335 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8336returntrue;
8337 }
8338
8339// Don't expand it if there isn't cheap fsub instruction.
8340if (!isOperationLegalOrCustom(
8341 Node->isStrictFPOpcode() ?ISD::STRICT_FSUB :ISD::FSUB, SrcVT))
8342returnfalse;
8343
8344SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8345SDValue Sel;
8346
8347if (Node->isStrictFPOpcode()) {
8348 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst,ISD::SETLT,
8349 Node->getOperand(0),/*IsSignaling*/true);
8350 Chain = Sel.getValue(1);
8351 }else {
8352 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst,ISD::SETLT);
8353 }
8354
8355bool Strict = Node->isStrictFPOpcode() ||
8356shouldUseStrictFP_TO_INT(SrcVT, DstVT,/*IsSigned*/false);
8357
8358if (Strict) {
8359// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8360// signmask then offset (the result of which should be fully representable).
8361// Sel = Src < 0x8000000000000000
8362// FltOfs = select Sel, 0, 0x8000000000000000
8363// IntOfs = select Sel, 0, 0x8000000000000000
8364// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8365
8366// TODO: Should any fast-math-flags be set for the FSUB?
8367SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8368 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8369 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8370SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8371 DAG.getConstant(0, dl, DstVT),
8372 DAG.getConstant(SignMask, dl, DstVT));
8373SDValue SInt;
8374if (Node->isStrictFPOpcode()) {
8375SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8376 { Chain, Src, FltOfs });
8377 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8378 { Val.getValue(1), Val });
8379 Chain = SInt.getValue(1);
8380 }else {
8381SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8382 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8383 }
8384 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8385 }else {
8386// Expand based on maximum range of FP_TO_SINT:
8387// True = fp_to_sint(Src)
8388// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8389// Result = select (Src < 0x8000000000000000), True, False
8390
8391SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8392// TODO: Should any fast-math-flags be set for the FSUB?
8393SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8394 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8395 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8396 DAG.getConstant(SignMask, dl, DstVT));
8397 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8398 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8399 }
8400returntrue;
8401}
8402
8403boolTargetLowering::expandUINT_TO_FP(SDNode *Node,SDValue &Result,
8404SDValue &Chain,SelectionDAG &DAG) const{
8405// This transform is not correct for converting 0 when rounding mode is set
8406// to round toward negative infinity which will produce -0.0. So disable
8407// under strictfp.
8408if (Node->isStrictFPOpcode())
8409returnfalse;
8410
8411SDValue Src = Node->getOperand(0);
8412EVT SrcVT = Src.getValueType();
8413EVT DstVT = Node->getValueType(0);
8414
8415// If the input is known to be non-negative and SINT_TO_FP is legal then use
8416// it.
8417if (Node->getFlags().hasNonNeg() &&
8418isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
8419 Result =
8420 DAG.getNode(ISD::SINT_TO_FP,SDLoc(Node), DstVT, Node->getOperand(0));
8421returntrue;
8422 }
8423
8424if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8425returnfalse;
8426
8427// Only expand vector types if we have the appropriate vector bit
8428// operations.
8429if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8430 !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8431 !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8432 !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8433 !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8434returnfalse;
8435
8436SDLoc dl(SDValue(Node, 0));
8437
8438// Implementation of unsigned i64 to f64 following the algorithm in
8439// __floatundidf in compiler_rt. This implementation performs rounding
8440// correctly in all rounding modes with the exception of converting 0
8441// when rounding toward negative infinity. In that case the fsub will
8442// produce -0.0. This will be added to +0.0 and produce -0.0 which is
8443// incorrect.
8444SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8445SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8446 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8447SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8448SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8449SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8450
8451SDValueLo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8452SDValueHi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8453SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT,Lo, TwoP52);
8454SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT,Hi, TwoP84);
8455SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8456SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8457SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8458 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8459returntrue;
8460}
8461
8462SDValue
8463TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8464SelectionDAG &DAG) const{
8465unsigned Opcode = Node->getOpcode();
8466assert((Opcode ==ISD::FMINNUM || Opcode ==ISD::FMAXNUM ||
8467 Opcode ==ISD::STRICT_FMINNUM || Opcode ==ISD::STRICT_FMAXNUM) &&
8468"Wrong opcode");
8469
8470if (Node->getFlags().hasNoNaNs()) {
8471ISD::CondCode Pred = Opcode ==ISD::FMINNUM ?ISD::SETLT :ISD::SETGT;
8472EVT VT = Node->getValueType(0);
8473if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8474 !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8475 VT.isVector())
8476returnSDValue();
8477SDValue Op1 = Node->getOperand(0);
8478SDValue Op2 = Node->getOperand(1);
8479SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8480// Copy FMF flags, but always set the no-signed-zeros flag
8481// as this is implied by the FMINNUM/FMAXNUM semantics.
8482 SelCC->setFlags(Node->getFlags() |SDNodeFlags::NoSignedZeros);
8483return SelCC;
8484 }
8485
8486returnSDValue();
8487}
8488
8489SDValueTargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8490SelectionDAG &DAG) const{
8491if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8492return Expanded;
8493
8494EVT VT = Node->getValueType(0);
8495if (VT.isScalableVector())
8496report_fatal_error(
8497"Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8498
8499SDLoc dl(Node);
8500unsigned NewOp =
8501 Node->getOpcode() ==ISD::FMINNUM ?ISD::FMINNUM_IEEE :ISD::FMAXNUM_IEEE;
8502
8503if (isOperationLegalOrCustom(NewOp, VT)) {
8504SDValue Quiet0 = Node->getOperand(0);
8505SDValue Quiet1 = Node->getOperand(1);
8506
8507if (!Node->getFlags().hasNoNaNs()) {
8508// Insert canonicalizes if it's possible we need to quiet to get correct
8509// sNaN behavior.
8510if (!DAG.isKnownNeverSNaN(Quiet0)) {
8511 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8512 Node->getFlags());
8513 }
8514if (!DAG.isKnownNeverSNaN(Quiet1)) {
8515 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8516 Node->getFlags());
8517 }
8518 }
8519
8520return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8521 }
8522
8523// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8524// instead if there are no NaNs and there can't be an incompatible zero
8525// compare: at least one operand isn't +/-0, or there are no signed-zeros.
8526if ((Node->getFlags().hasNoNaNs() ||
8527 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8528 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8529 (Node->getFlags().hasNoSignedZeros() ||
8530 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8531 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8532unsigned IEEE2018Op =
8533 Node->getOpcode() ==ISD::FMINNUM ?ISD::FMINIMUM :ISD::FMAXIMUM;
8534if (isOperationLegalOrCustom(IEEE2018Op, VT))
8535return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8536 Node->getOperand(1), Node->getFlags());
8537 }
8538
8539if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8540return SelCC;
8541
8542returnSDValue();
8543}
8544
8545SDValueTargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8546SelectionDAG &DAG) const{
8547if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8548return Expanded;
8549
8550SDLocDL(N);
8551SDValueLHS =N->getOperand(0);
8552SDValueRHS =N->getOperand(1);
8553unsigned Opc =N->getOpcode();
8554EVT VT =N->getValueType(0);
8555EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8556bool IsMax = Opc ==ISD::FMAXIMUM;
8557SDNodeFlags Flags =N->getFlags();
8558
8559// First, implement comparison not propagating NaN. If no native fmin or fmax
8560// available, use plain select with setcc instead.
8561SDValueMinMax;
8562unsigned CompOpcIeee = IsMax ?ISD::FMAXNUM_IEEE :ISD::FMINNUM_IEEE;
8563unsigned CompOpc = IsMax ?ISD::FMAXNUM :ISD::FMINNUM;
8564
8565// FIXME: We should probably define fminnum/fmaxnum variants with correct
8566// signed zero behavior.
8567bool MinMaxMustRespectOrderedZero =false;
8568
8569if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8570MinMax = DAG.getNode(CompOpcIeee,DL, VT,LHS,RHS, Flags);
8571 MinMaxMustRespectOrderedZero =true;
8572 }elseif (isOperationLegalOrCustom(CompOpc, VT)) {
8573MinMax = DAG.getNode(CompOpc,DL, VT,LHS,RHS, Flags);
8574 }else {
8575if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8576return DAG.UnrollVectorOp(N);
8577
8578// NaN (if exists) will be propagated later, so orderness doesn't matter.
8579SDValue Compare =
8580 DAG.getSetCC(DL, CCVT,LHS,RHS, IsMax ?ISD::SETOGT :ISD::SETOLT);
8581MinMax = DAG.getSelect(DL, VT, Compare,LHS,RHS, Flags);
8582 }
8583
8584// Propagate any NaN of both operands
8585if (!N->getFlags().hasNoNaNs() &&
8586 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8587ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8588APFloat::getNaN(VT.getFltSemantics()));
8589MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT,LHS,RHS,ISD::SETUO),
8590 DAG.getConstantFP(*FPNaN,DL, VT),MinMax, Flags);
8591 }
8592
8593// fminimum/fmaximum requires -0.0 less than +0.0
8594if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8595 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8596SDValue IsZero = DAG.getSetCC(DL, CCVT,MinMax,
8597 DAG.getConstantFP(0.0,DL, VT),ISD::SETOEQ);
8598SDValue TestZero =
8599 DAG.getTargetConstant(IsMax ?fcPosZero :fcNegZero,DL, MVT::i32);
8600SDValue LCmp = DAG.getSelect(
8601DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,LHS, TestZero),LHS,
8602MinMax, Flags);
8603SDValue RCmp = DAG.getSelect(
8604DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,RHS, TestZero),RHS,
8605 LCmp, Flags);
8606MinMax = DAG.getSelect(DL, VT, IsZero, RCmp,MinMax, Flags);
8607 }
8608
8609returnMinMax;
8610}
8611
8612SDValueTargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8613SelectionDAG &DAG) const{
8614SDLocDL(Node);
8615SDValueLHS = Node->getOperand(0);
8616SDValueRHS = Node->getOperand(1);
8617unsigned Opc = Node->getOpcode();
8618EVT VT = Node->getValueType(0);
8619EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8620bool IsMax = Opc ==ISD::FMAXIMUMNUM;
8621constTargetOptions &Options = DAG.getTarget().Options;
8622SDNodeFlags Flags = Node->getFlags();
8623
8624unsigned NewOp =
8625 Opc ==ISD::FMINIMUMNUM ?ISD::FMINNUM_IEEE :ISD::FMAXNUM_IEEE;
8626
8627if (isOperationLegalOrCustom(NewOp, VT)) {
8628if (!Flags.hasNoNaNs()) {
8629// Insert canonicalizes if it's possible we need to quiet to get correct
8630// sNaN behavior.
8631if (!DAG.isKnownNeverSNaN(LHS)) {
8632LHS = DAG.getNode(ISD::FCANONICALIZE,DL, VT,LHS, Flags);
8633 }
8634if (!DAG.isKnownNeverSNaN(RHS)) {
8635RHS = DAG.getNode(ISD::FCANONICALIZE,DL, VT,RHS, Flags);
8636 }
8637 }
8638
8639return DAG.getNode(NewOp,DL, VT,LHS,RHS, Flags);
8640 }
8641
8642// We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8643// same behaviors for all of other cases: +0.0 vs -0.0 included.
8644if (Flags.hasNoNaNs() ||
8645 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8646unsigned IEEE2019Op =
8647 Opc ==ISD::FMINIMUMNUM ?ISD::FMINIMUM :ISD::FMAXIMUM;
8648if (isOperationLegalOrCustom(IEEE2019Op, VT))
8649return DAG.getNode(IEEE2019Op,DL, VT,LHS,RHS, Flags);
8650 }
8651
8652// FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8653// either one for +0.0 vs -0.0.
8654if ((Flags.hasNoNaNs() ||
8655 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8656 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8657 DAG.isKnownNeverZeroFloat(RHS))) {
8658unsigned IEEE2008Op = Opc ==ISD::FMINIMUMNUM ?ISD::FMINNUM :ISD::FMAXNUM;
8659if (isOperationLegalOrCustom(IEEE2008Op, VT))
8660return DAG.getNode(IEEE2008Op,DL, VT,LHS,RHS, Flags);
8661 }
8662
8663if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8664return DAG.UnrollVectorOp(Node);
8665
8666// If only one operand is NaN, override it with another operand.
8667if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8668LHS = DAG.getSelectCC(DL,LHS,LHS,RHS,LHS,ISD::SETUO);
8669 }
8670if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8671RHS = DAG.getSelectCC(DL,RHS,RHS,LHS,RHS,ISD::SETUO);
8672 }
8673
8674SDValueMinMax =
8675 DAG.getSelectCC(DL,LHS,RHS,LHS,RHS, IsMax ?ISD::SETGT :ISD::SETLT);
8676// If MinMax is NaN, let's quiet it.
8677if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8678 !DAG.isKnownNeverNaN(RHS)) {
8679MinMax = DAG.getNode(ISD::FCANONICALIZE,DL, VT,MinMax, Flags);
8680 }
8681
8682// Fixup signed zero behavior.
8683if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8684 DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8685returnMinMax;
8686 }
8687SDValue TestZero =
8688 DAG.getTargetConstant(IsMax ?fcPosZero :fcNegZero,DL, MVT::i32);
8689SDValue IsZero = DAG.getSetCC(DL, CCVT,MinMax,
8690 DAG.getConstantFP(0.0,DL, VT),ISD::SETEQ);
8691SDValue LCmp = DAG.getSelect(
8692DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,LHS, TestZero),LHS,
8693MinMax, Flags);
8694SDValue RCmp = DAG.getSelect(
8695DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,RHS, TestZero),RHS, LCmp,
8696 Flags);
8697return DAG.getSelect(DL, VT, IsZero, RCmp,MinMax, Flags);
8698}
8699
8700/// Returns a true value if if this FPClassTest can be performed with an ordered
8701/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8702/// std::nullopt if it cannot be performed as a compare with 0.
8703static std::optional<bool>isFCmpEqualZero(FPClassTestTest,
8704constfltSemantics &Semantics,
8705constMachineFunction &MF) {
8706FPClassTest OrderedMask =Test & ~fcNan;
8707FPClassTest NanTest =Test &fcNan;
8708bool IsOrdered = NanTest ==fcNone;
8709bool IsUnordered = NanTest ==fcNan;
8710
8711// Skip cases that are testing for only a qnan or snan.
8712if (!IsOrdered && !IsUnordered)
8713return std::nullopt;
8714
8715if (OrderedMask ==fcZero &&
8716 MF.getDenormalMode(Semantics).Input ==DenormalMode::IEEE)
8717return IsOrdered;
8718if (OrderedMask == (fcZero |fcSubnormal) &&
8719 MF.getDenormalMode(Semantics).inputsAreZero())
8720return IsOrdered;
8721return std::nullopt;
8722}
8723
8724SDValueTargetLowering::expandIS_FPCLASS(EVT ResultVT,SDValueOp,
8725constFPClassTest OrigTestMask,
8726SDNodeFlags Flags,constSDLoc &DL,
8727SelectionDAG &DAG) const{
8728EVT OperandVT =Op.getValueType();
8729assert(OperandVT.isFloatingPoint());
8730FPClassTestTest = OrigTestMask;
8731
8732// Degenerated cases.
8733if (Test ==fcNone)
8734return DAG.getBoolConstant(false,DL, ResultVT, OperandVT);
8735if (Test ==fcAllFlags)
8736return DAG.getBoolConstant(true,DL, ResultVT, OperandVT);
8737
8738// PPC double double is a pair of doubles, of which the higher part determines
8739// the value class.
8740if (OperandVT == MVT::ppcf128) {
8741Op = DAG.getNode(ISD::EXTRACT_ELEMENT,DL, MVT::f64,Op,
8742 DAG.getConstant(1,DL, MVT::i32));
8743 OperandVT = MVT::f64;
8744 }
8745
8746// Floating-point type properties.
8747EVT ScalarFloatVT = OperandVT.getScalarType();
8748constType *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8749constllvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8750bool IsF80 = (ScalarFloatVT == MVT::f80);
8751
8752// Some checks can be implemented using float comparisons, if floating point
8753// exceptions are ignored.
8754if (Flags.hasNoFPExcept() &&
8755isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8756FPClassTest FPTestMask =Test;
8757bool IsInvertedFP =false;
8758
8759if (FPClassTest InvertedFPCheck =
8760invertFPClassTestIfSimpler(FPTestMask,true)) {
8761 FPTestMask = InvertedFPCheck;
8762 IsInvertedFP =true;
8763 }
8764
8765ISD::CondCode OrderedCmpOpcode = IsInvertedFP ?ISD::SETUNE :ISD::SETOEQ;
8766ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ?ISD::SETONE :ISD::SETUEQ;
8767
8768// See if we can fold an | fcNan into an unordered compare.
8769FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8770
8771// Can't fold the ordered check if we're only testing for snan or qnan
8772// individually.
8773if ((FPTestMask &fcNan) !=fcNan)
8774 OrderedFPTestMask = FPTestMask;
8775
8776constbool IsOrdered = FPTestMask == OrderedFPTestMask;
8777
8778if (std::optional<bool> IsCmp0 =
8779isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8780 IsCmp0 && (isCondCodeLegalOrCustom(
8781 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8782 OperandVT.getScalarType().getSimpleVT()))) {
8783
8784// If denormals could be implicitly treated as 0, this is not equivalent
8785// to a compare with 0 since it will also be true for denormals.
8786return DAG.getSetCC(DL, ResultVT,Op,
8787 DAG.getConstantFP(0.0,DL, OperandVT),
8788 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8789 }
8790
8791if (FPTestMask ==fcNan &&
8792isCondCodeLegalOrCustom(IsInvertedFP ?ISD::SETO :ISD::SETUO,
8793 OperandVT.getScalarType().getSimpleVT()))
8794return DAG.getSetCC(DL, ResultVT,Op,Op,
8795 IsInvertedFP ?ISD::SETO :ISD::SETUO);
8796
8797bool IsOrderedInf = FPTestMask ==fcInf;
8798if ((FPTestMask ==fcInf || FPTestMask == (fcInf |fcNan)) &&
8799isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8800 : UnorderedCmpOpcode,
8801 OperandVT.getScalarType().getSimpleVT()) &&
8802isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8803 (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8804 (OperandVT.isVector() &&
8805isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
8806// isinf(x) --> fabs(x) == inf
8807SDValue Abs = DAG.getNode(ISD::FABS,DL, OperandVT,Op);
8808SDValue Inf =
8809 DAG.getConstantFP(APFloat::getInf(Semantics),DL, OperandVT);
8810return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8811 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8812 }
8813
8814if ((OrderedFPTestMask ==fcPosInf || OrderedFPTestMask ==fcNegInf) &&
8815isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8816 : UnorderedCmpOpcode,
8817 OperandVT.getSimpleVT())) {
8818// isposinf(x) --> x == inf
8819// isneginf(x) --> x == -inf
8820// isposinf(x) || nan --> x u== inf
8821// isneginf(x) || nan --> x u== -inf
8822
8823SDValue Inf = DAG.getConstantFP(
8824APFloat::getInf(Semantics, OrderedFPTestMask ==fcNegInf),DL,
8825 OperandVT);
8826return DAG.getSetCC(DL, ResultVT,Op, Inf,
8827 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8828 }
8829
8830if (OrderedFPTestMask == (fcSubnormal |fcZero) && !IsOrdered) {
8831// TODO: Could handle ordered case, but it produces worse code for
8832// x86. Maybe handle ordered if fabs is free?
8833
8834ISD::CondCode OrderedOp = IsInvertedFP ?ISD::SETUGE :ISD::SETOLT;
8835ISD::CondCode UnorderedOp = IsInvertedFP ?ISD::SETOGE :ISD::SETULT;
8836
8837if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8838 OperandVT.getScalarType().getSimpleVT())) {
8839// (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8840
8841// TODO: Maybe only makes sense if fabs is free. Integer test of
8842// exponent bits seems better for x86.
8843SDValue Abs = DAG.getNode(ISD::FABS,DL, OperandVT,Op);
8844SDValue SmallestNormal = DAG.getConstantFP(
8845APFloat::getSmallestNormalized(Semantics),DL, OperandVT);
8846return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8847 IsOrdered ? OrderedOp : UnorderedOp);
8848 }
8849 }
8850
8851if (FPTestMask ==fcNormal) {
8852// TODO: Handle unordered
8853ISD::CondCode IsFiniteOp = IsInvertedFP ?ISD::SETUGE :ISD::SETOLT;
8854ISD::CondCode IsNormalOp = IsInvertedFP ?ISD::SETOLT :ISD::SETUGE;
8855
8856if (isCondCodeLegalOrCustom(IsFiniteOp,
8857 OperandVT.getScalarType().getSimpleVT()) &&
8858isCondCodeLegalOrCustom(IsNormalOp,
8859 OperandVT.getScalarType().getSimpleVT()) &&
8860isFAbsFree(OperandVT)) {
8861// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8862SDValue Inf =
8863 DAG.getConstantFP(APFloat::getInf(Semantics),DL, OperandVT);
8864SDValue SmallestNormal = DAG.getConstantFP(
8865APFloat::getSmallestNormalized(Semantics),DL, OperandVT);
8866
8867SDValue Abs = DAG.getNode(ISD::FABS,DL, OperandVT,Op);
8868SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8869SDValue IsNormal =
8870 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8871unsigned LogicOp = IsInvertedFP ?ISD::OR :ISD::AND;
8872return DAG.getNode(LogicOp,DL, ResultVT, IsFinite, IsNormal);
8873 }
8874 }
8875 }
8876
8877// Some checks may be represented as inversion of simpler check, for example
8878// "inf|normal|subnormal|zero" => !"nan".
8879bool IsInverted =false;
8880
8881if (FPClassTest InvertedCheck =invertFPClassTestIfSimpler(Test,false)) {
8882Test = InvertedCheck;
8883 IsInverted =true;
8884 }
8885
8886// In the general case use integer operations.
8887unsigned BitSize = OperandVT.getScalarSizeInBits();
8888EVT IntVT =EVT::getIntegerVT(*DAG.getContext(), BitSize);
8889if (OperandVT.isVector())
8890 IntVT =EVT::getVectorVT(*DAG.getContext(), IntVT,
8891 OperandVT.getVectorElementCount());
8892SDValue OpAsInt = DAG.getBitcast(IntVT,Op);
8893
8894// Various masks.
8895APInt SignBit =APInt::getSignMask(BitSize);
8896APInt ValueMask =APInt::getSignedMaxValue(BitSize);// All bits but sign.
8897APInt Inf =APFloat::getInf(Semantics).bitcastToAPInt();// Exp and int bit.
8898constunsigned ExplicitIntBitInF80 = 63;
8899APInt ExpMask = Inf;
8900if (IsF80)
8901 ExpMask.clearBit(ExplicitIntBitInF80);
8902APInt AllOneMantissa =APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8903APInt QNaNBitMask =
8904APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8905APInt InvertionMask =APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8906
8907SDValue ValueMaskV = DAG.getConstant(ValueMask,DL, IntVT);
8908SDValue SignBitV = DAG.getConstant(SignBit,DL, IntVT);
8909SDValue ExpMaskV = DAG.getConstant(ExpMask,DL, IntVT);
8910SDValue ZeroV = DAG.getConstant(0,DL, IntVT);
8911SDValue InfV = DAG.getConstant(Inf,DL, IntVT);
8912SDValue ResultInvertionMask = DAG.getConstant(InvertionMask,DL, ResultVT);
8913
8914SDValue Res;
8915constauto appendResult = [&](SDValue PartialRes) {
8916if (PartialRes) {
8917if (Res)
8918 Res = DAG.getNode(ISD::OR,DL, ResultVT, Res, PartialRes);
8919else
8920 Res = PartialRes;
8921 }
8922 };
8923
8924SDValue IntBitIsSetV;// Explicit integer bit in f80 mantissa is set.
8925constauto getIntBitIsSet = [&]() ->SDValue {
8926if (!IntBitIsSetV) {
8927APInt IntBitMask(BitSize, 0);
8928 IntBitMask.setBit(ExplicitIntBitInF80);
8929SDValue IntBitMaskV = DAG.getConstant(IntBitMask,DL, IntVT);
8930SDValue IntBitV = DAG.getNode(ISD::AND,DL, IntVT, OpAsInt, IntBitMaskV);
8931 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV,ISD::SETNE);
8932 }
8933return IntBitIsSetV;
8934 };
8935
8936// Split the value into sign bit and absolute value.
8937SDValue AbsV = DAG.getNode(ISD::AND,DL, IntVT, OpAsInt, ValueMaskV);
8938SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8939 DAG.getConstant(0,DL, IntVT),ISD::SETLT);
8940
8941// Tests that involve more than one class should be processed first.
8942SDValue PartialRes;
8943
8944if (IsF80)
8945 ;// Detect finite numbers of f80 by checking individual classes because
8946// they have different settings of the explicit integer bit.
8947elseif ((Test &fcFinite) ==fcFinite) {
8948// finite(V) ==> abs(V) < exp_mask
8949 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV,ISD::SETLT);
8950Test &= ~fcFinite;
8951 }elseif ((Test &fcFinite) ==fcPosFinite) {
8952// finite(V) && V > 0 ==> V < exp_mask
8953 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV,ISD::SETULT);
8954Test &= ~fcPosFinite;
8955 }elseif ((Test &fcFinite) ==fcNegFinite) {
8956// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8957 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV,ISD::SETLT);
8958 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, SignV);
8959Test &= ~fcNegFinite;
8960 }
8961 appendResult(PartialRes);
8962
8963if (FPClassTest PartialCheck =Test & (fcZero |fcSubnormal)) {
8964// fcZero | fcSubnormal => test all exponent bits are 0
8965// TODO: Handle sign bit specific cases
8966if (PartialCheck == (fcZero |fcSubnormal)) {
8967SDValue ExpBits = DAG.getNode(ISD::AND,DL, IntVT, OpAsInt, ExpMaskV);
8968SDValue ExpIsZero =
8969 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV,ISD::SETEQ);
8970 appendResult(ExpIsZero);
8971Test &= ~PartialCheck &fcAllFlags;
8972 }
8973 }
8974
8975// Check for individual classes.
8976
8977if (unsigned PartialCheck =Test &fcZero) {
8978if (PartialCheck ==fcPosZero)
8979 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV,ISD::SETEQ);
8980elseif (PartialCheck ==fcZero)
8981 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV,ISD::SETEQ);
8982else// ISD::fcNegZero
8983 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV,ISD::SETEQ);
8984 appendResult(PartialRes);
8985 }
8986
8987if (unsigned PartialCheck =Test &fcSubnormal) {
8988// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8989// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8990SDValue V = (PartialCheck ==fcPosSubnormal) ? OpAsInt : AbsV;
8991SDValue MantissaV = DAG.getConstant(AllOneMantissa,DL, IntVT);
8992SDValue VMinusOneV =
8993 DAG.getNode(ISD::SUB,DL, IntVT, V, DAG.getConstant(1,DL, IntVT));
8994 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV,ISD::SETULT);
8995if (PartialCheck ==fcNegSubnormal)
8996 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, SignV);
8997 appendResult(PartialRes);
8998 }
8999
9000if (unsigned PartialCheck =Test &fcInf) {
9001if (PartialCheck ==fcPosInf)
9002 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV,ISD::SETEQ);
9003elseif (PartialCheck ==fcInf)
9004 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV,ISD::SETEQ);
9005else {// ISD::fcNegInf
9006APInt NegInf =APFloat::getInf(Semantics,true).bitcastToAPInt();
9007SDValue NegInfV = DAG.getConstant(NegInf,DL, IntVT);
9008 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV,ISD::SETEQ);
9009 }
9010 appendResult(PartialRes);
9011 }
9012
9013if (unsigned PartialCheck =Test &fcNan) {
9014APInt InfWithQnanBit = Inf | QNaNBitMask;
9015SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit,DL, IntVT);
9016if (PartialCheck ==fcNan) {
9017// isnan(V) ==> abs(V) > int(inf)
9018 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV,ISD::SETGT);
9019if (IsF80) {
9020// Recognize unsupported values as NaNs for compatibility with glibc.
9021// In them (exp(V)==0) == int_bit.
9022SDValue ExpBits = DAG.getNode(ISD::AND,DL, IntVT, AbsV, ExpMaskV);
9023SDValue ExpIsZero =
9024 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV,ISD::SETEQ);
9025SDValue IsPseudo =
9026 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero,ISD::SETEQ);
9027 PartialRes = DAG.getNode(ISD::OR,DL, ResultVT, PartialRes, IsPseudo);
9028 }
9029 }elseif (PartialCheck ==fcQNan) {
9030// isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9031 PartialRes =
9032 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV,ISD::SETGE);
9033 }else {// ISD::fcSNan
9034// issignaling(V) ==> abs(V) > unsigned(Inf) &&
9035// abs(V) < (unsigned(Inf) | quiet_bit)
9036SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV,ISD::SETGT);
9037SDValue IsNotQnan =
9038 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV,ISD::SETLT);
9039 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, IsNan, IsNotQnan);
9040 }
9041 appendResult(PartialRes);
9042 }
9043
9044if (unsigned PartialCheck =Test &fcNormal) {
9045// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9046APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9047SDValue ExpLSBV = DAG.getConstant(ExpLSB,DL, IntVT);
9048SDValue ExpMinus1 = DAG.getNode(ISD::SUB,DL, IntVT, AbsV, ExpLSBV);
9049APInt ExpLimit = ExpMask - ExpLSB;
9050SDValue ExpLimitV = DAG.getConstant(ExpLimit,DL, IntVT);
9051 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV,ISD::SETULT);
9052if (PartialCheck ==fcNegNormal)
9053 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, SignV);
9054elseif (PartialCheck ==fcPosNormal) {
9055SDValue PosSignV =
9056 DAG.getNode(ISD::XOR,DL, ResultVT, SignV, ResultInvertionMask);
9057 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, PosSignV);
9058 }
9059if (IsF80)
9060 PartialRes =
9061 DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, getIntBitIsSet());
9062 appendResult(PartialRes);
9063 }
9064
9065if (!Res)
9066return DAG.getConstant(IsInverted,DL, ResultVT);
9067if (IsInverted)
9068 Res = DAG.getNode(ISD::XOR,DL, ResultVT, Res, ResultInvertionMask);
9069return Res;
9070}
9071
9072// Only expand vector types if we have the appropriate vector bit operations.
9073staticboolcanExpandVectorCTPOP(constTargetLowering &TLI,EVT VT) {
9074assert(VT.isVector() &&"Expected vector type");
9075unsigned Len = VT.getScalarSizeInBits();
9076return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9077 TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
9078 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
9079 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9080 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
9081}
9082
9083SDValueTargetLowering::expandCTPOP(SDNode *Node,SelectionDAG &DAG) const{
9084SDLoc dl(Node);
9085EVT VT = Node->getValueType(0);
9086EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
9087SDValueOp = Node->getOperand(0);
9088unsigned Len = VT.getScalarSizeInBits();
9089assert(VT.isInteger() &&"CTPOP not implemented for this type.");
9090
9091// TODO: Add support for irregular type lengths.
9092if (!(Len <= 128 && Len % 8 == 0))
9093returnSDValue();
9094
9095// Only expand vector types if we have the appropriate vector bit operations.
9096if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9097returnSDValue();
9098
9099// This is the "best" algorithm from
9100// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9101SDValue Mask55 =
9102 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x55)), dl, VT);
9103SDValue Mask33 =
9104 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x33)), dl, VT);
9105SDValue Mask0F =
9106 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x0F)), dl, VT);
9107
9108// v = v - ((v >> 1) & 0x55555555...)
9109Op = DAG.getNode(ISD::SUB, dl, VT,Op,
9110 DAG.getNode(ISD::AND, dl, VT,
9111 DAG.getNode(ISD::SRL, dl, VT,Op,
9112 DAG.getConstant(1, dl, ShVT)),
9113 Mask55));
9114// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9115Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT,Op, Mask33),
9116 DAG.getNode(ISD::AND, dl, VT,
9117 DAG.getNode(ISD::SRL, dl, VT,Op,
9118 DAG.getConstant(2, dl, ShVT)),
9119 Mask33));
9120// v = (v + (v >> 4)) & 0x0F0F0F0F...
9121Op = DAG.getNode(ISD::AND, dl, VT,
9122 DAG.getNode(ISD::ADD, dl, VT,Op,
9123 DAG.getNode(ISD::SRL, dl, VT,Op,
9124 DAG.getConstant(4, dl, ShVT))),
9125 Mask0F);
9126
9127if (Len <= 8)
9128returnOp;
9129
9130// Avoid the multiply if we only have 2 bytes to add.
9131// TODO: Only doing this for scalars because vectors weren't as obviously
9132// improved.
9133if (Len == 16 && !VT.isVector()) {
9134// v = (v + (v >> 8)) & 0x00FF;
9135return DAG.getNode(ISD::AND, dl, VT,
9136 DAG.getNode(ISD::ADD, dl, VT,Op,
9137 DAG.getNode(ISD::SRL, dl, VT,Op,
9138 DAG.getConstant(8, dl, ShVT))),
9139 DAG.getConstant(0xFF, dl, VT));
9140 }
9141
9142// v = (v * 0x01010101...) >> (Len - 8)
9143SDValue V;
9144if (isOperationLegalOrCustomOrPromote(
9145ISD::MUL,getTypeToTransformTo(*DAG.getContext(), VT))) {
9146SDValue Mask01 =
9147 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x01)), dl, VT);
9148 V = DAG.getNode(ISD::MUL, dl, VT,Op, Mask01);
9149 }else {
9150 V =Op;
9151for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9152SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9153 V = DAG.getNode(ISD::ADD, dl, VT, V,
9154 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9155 }
9156 }
9157return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9158}
9159
9160SDValueTargetLowering::expandVPCTPOP(SDNode *Node,SelectionDAG &DAG) const{
9161SDLoc dl(Node);
9162EVT VT = Node->getValueType(0);
9163EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
9164SDValueOp = Node->getOperand(0);
9165SDValue Mask = Node->getOperand(1);
9166SDValue VL = Node->getOperand(2);
9167unsigned Len = VT.getScalarSizeInBits();
9168assert(VT.isInteger() &&"VP_CTPOP not implemented for this type.");
9169
9170// TODO: Add support for irregular type lengths.
9171if (!(Len <= 128 && Len % 8 == 0))
9172returnSDValue();
9173
9174// This is same algorithm of expandCTPOP from
9175// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9176SDValue Mask55 =
9177 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x55)), dl, VT);
9178SDValue Mask33 =
9179 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x33)), dl, VT);
9180SDValue Mask0F =
9181 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x0F)), dl, VT);
9182
9183SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9184
9185// v = v - ((v >> 1) & 0x55555555...)
9186 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9187 DAG.getNode(ISD::VP_SRL, dl, VT,Op,
9188 DAG.getConstant(1, dl, ShVT), Mask, VL),
9189 Mask55, Mask, VL);
9190Op = DAG.getNode(ISD::VP_SUB, dl, VT,Op, Tmp1, Mask, VL);
9191
9192// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9193 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT,Op, Mask33, Mask, VL);
9194 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9195 DAG.getNode(ISD::VP_SRL, dl, VT,Op,
9196 DAG.getConstant(2, dl, ShVT), Mask, VL),
9197 Mask33, Mask, VL);
9198Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9199
9200// v = (v + (v >> 4)) & 0x0F0F0F0F...
9201 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(4, dl, ShVT),
9202 Mask, VL),
9203 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT,Op, Tmp4, Mask, VL);
9204Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9205
9206if (Len <= 8)
9207returnOp;
9208
9209// v = (v * 0x01010101...) >> (Len - 8)
9210SDValue V;
9211if (isOperationLegalOrCustomOrPromote(
9212 ISD::VP_MUL,getTypeToTransformTo(*DAG.getContext(), VT))) {
9213SDValue Mask01 =
9214 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x01)), dl, VT);
9215 V = DAG.getNode(ISD::VP_MUL, dl, VT,Op, Mask01, Mask, VL);
9216 }else {
9217 V =Op;
9218for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9219SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9220 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9221 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9222 Mask, VL);
9223 }
9224 }
9225return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9226 Mask, VL);
9227}
9228
9229SDValueTargetLowering::expandCTLZ(SDNode *Node,SelectionDAG &DAG) const{
9230SDLoc dl(Node);
9231EVT VT = Node->getValueType(0);
9232EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
9233SDValueOp = Node->getOperand(0);
9234unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9235
9236// If the non-ZERO_UNDEF version is supported we can use that instead.
9237if (Node->getOpcode() ==ISD::CTLZ_ZERO_UNDEF &&
9238isOperationLegalOrCustom(ISD::CTLZ, VT))
9239return DAG.getNode(ISD::CTLZ, dl, VT,Op);
9240
9241// If the ZERO_UNDEF version is supported use that and handle the zero case.
9242if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
9243EVT SetCCVT =
9244getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9245SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT,Op);
9246SDValue Zero = DAG.getConstant(0, dl, VT);
9247SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT,Op, Zero,ISD::SETEQ);
9248return DAG.getSelect(dl, VT, SrcIsZero,
9249 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9250 }
9251
9252// Only expand vector types if we have the appropriate vector bit operations.
9253// This includes the operations needed to expand CTPOP if it isn't supported.
9254if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9255 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9256 !canExpandVectorCTPOP(*this, VT)) ||
9257 !isOperationLegalOrCustom(ISD::SRL, VT) ||
9258 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9259returnSDValue();
9260
9261// for now, we do this:
9262// x = x | (x >> 1);
9263// x = x | (x >> 2);
9264// ...
9265// x = x | (x >>16);
9266// x = x | (x >>32); // for 64-bit input
9267// return popcount(~x);
9268//
9269// Ref: "Hacker's Delight" by Henry Warren
9270for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9271SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9272Op = DAG.getNode(ISD::OR, dl, VT,Op,
9273 DAG.getNode(ISD::SRL, dl, VT,Op, Tmp));
9274 }
9275Op = DAG.getNOT(dl,Op, VT);
9276return DAG.getNode(ISD::CTPOP, dl, VT,Op);
9277}
9278
9279SDValueTargetLowering::expandVPCTLZ(SDNode *Node,SelectionDAG &DAG) const{
9280SDLoc dl(Node);
9281EVT VT = Node->getValueType(0);
9282EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());
9283SDValueOp = Node->getOperand(0);
9284SDValue Mask = Node->getOperand(1);
9285SDValue VL = Node->getOperand(2);
9286unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9287
9288// do this:
9289// x = x | (x >> 1);
9290// x = x | (x >> 2);
9291// ...
9292// x = x | (x >>16);
9293// x = x | (x >>32); // for 64-bit input
9294// return popcount(~x);
9295for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9296SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9297Op = DAG.getNode(ISD::VP_OR, dl, VT,Op,
9298 DAG.getNode(ISD::VP_SRL, dl, VT,Op, Tmp, Mask, VL), Mask,
9299 VL);
9300 }
9301Op = DAG.getNode(ISD::VP_XOR, dl, VT,Op, DAG.getAllOnesConstant(dl, VT),
9302 Mask, VL);
9303return DAG.getNode(ISD::VP_CTPOP, dl, VT,Op, Mask, VL);
9304}
9305
9306SDValueTargetLowering::CTTZTableLookup(SDNode *Node,SelectionDAG &DAG,
9307constSDLoc &DL,EVT VT,SDValueOp,
9308unsignedBitWidth) const{
9309if (BitWidth != 32 &&BitWidth != 64)
9310returnSDValue();
9311APInt DeBruijn =BitWidth == 32 ?APInt(32, 0x077CB531U)
9312 :APInt(64, 0x0218A392CD3D5DBFULL);
9313constDataLayout &TD = DAG.getDataLayout();
9314MachinePointerInfo PtrInfo =
9315MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9316unsigned ShiftAmt =BitWidth -Log2_32(BitWidth);
9317SDValue Neg = DAG.getNode(ISD::SUB,DL, VT, DAG.getConstant(0,DL, VT),Op);
9318SDValueLookup = DAG.getNode(
9319ISD::SRL,DL, VT,
9320 DAG.getNode(ISD::MUL,DL, VT, DAG.getNode(ISD::AND,DL, VT,Op, Neg),
9321 DAG.getConstant(DeBruijn,DL, VT)),
9322 DAG.getConstant(ShiftAmt,DL, VT));
9323Lookup = DAG.getSExtOrTrunc(Lookup,DL,getPointerTy(TD));
9324
9325SmallVector<uint8_t> Table(BitWidth, 0);
9326for (unsigned i = 0; i <BitWidth; i++) {
9327APInt Shl = DeBruijn.shl(i);
9328APInt Lshr = Shl.lshr(ShiftAmt);
9329 Table[Lshr.getZExtValue()] = i;
9330 }
9331
9332// Create a ConstantArray in Constant Pool
9333auto *CA =ConstantDataArray::get(*DAG.getContext(), Table);
9334SDValue CPIdx = DAG.getConstantPool(CA,getPointerTy(TD),
9335 TD.getPrefTypeAlign(CA->getType()));
9336SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD,DL, VT, DAG.getEntryNode(),
9337 DAG.getMemBasePlusOffset(CPIdx,Lookup,DL),
9338 PtrInfo, MVT::i8);
9339if (Node->getOpcode() ==ISD::CTTZ_ZERO_UNDEF)
9340return ExtLoad;
9341
9342EVT SetCCVT =
9343getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9344SDValue Zero = DAG.getConstant(0,DL, VT);
9345SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT,Op, Zero,ISD::SETEQ);
9346return DAG.getSelect(DL, VT, SrcIsZero,
9347 DAG.getConstant(BitWidth,DL, VT), ExtLoad);
9348}
9349
9350SDValueTargetLowering::expandCTTZ(SDNode *Node,SelectionDAG &DAG) const{
9351SDLoc dl(Node);
9352EVT VT = Node->getValueType(0);
9353SDValueOp = Node->getOperand(0);
9354unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9355
9356// If the non-ZERO_UNDEF version is supported we can use that instead.
9357if (Node->getOpcode() ==ISD::CTTZ_ZERO_UNDEF &&
9358isOperationLegalOrCustom(ISD::CTTZ, VT))
9359return DAG.getNode(ISD::CTTZ, dl, VT,Op);
9360
9361// If the ZERO_UNDEF version is supported use that and handle the zero case.
9362if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9363EVT SetCCVT =
9364getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9365SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT,Op);
9366SDValue Zero = DAG.getConstant(0, dl, VT);
9367SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT,Op, Zero,ISD::SETEQ);
9368return DAG.getSelect(dl, VT, SrcIsZero,
9369 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9370 }
9371
9372// Only expand vector types if we have the appropriate vector bit operations.
9373// This includes the operations needed to expand CTPOP if it isn't supported.
9374if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9375 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9376 !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9377 !canExpandVectorCTPOP(*this, VT)) ||
9378 !isOperationLegalOrCustom(ISD::SUB, VT) ||
9379 !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9380 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9381returnSDValue();
9382
9383// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9384if (!VT.isVector() &&isOperationExpand(ISD::CTPOP, VT) &&
9385 !isOperationLegal(ISD::CTLZ, VT))
9386if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT,Op, NumBitsPerElt))
9387return V;
9388
9389// for now, we use: { return popcount(~x & (x - 1)); }
9390// unless the target has ctlz but not ctpop, in which case we use:
9391// { return 32 - nlz(~x & (x-1)); }
9392// Ref: "Hacker's Delight" by Henry Warren
9393SDValue Tmp = DAG.getNode(
9394ISD::AND, dl, VT, DAG.getNOT(dl,Op, VT),
9395 DAG.getNode(ISD::SUB, dl, VT,Op, DAG.getConstant(1, dl, VT)));
9396
9397// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9398if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9399return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9400 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9401 }
9402
9403return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9404}
9405
9406SDValueTargetLowering::expandVPCTTZ(SDNode *Node,SelectionDAG &DAG) const{
9407SDValueOp = Node->getOperand(0);
9408SDValue Mask = Node->getOperand(1);
9409SDValue VL = Node->getOperand(2);
9410SDLoc dl(Node);
9411EVT VT = Node->getValueType(0);
9412
9413// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9414SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT,Op,
9415 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9416SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT,Op,
9417 DAG.getConstant(1, dl, VT), Mask, VL);
9418SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9419return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9420}
9421
9422SDValueTargetLowering::expandVPCTTZElements(SDNode *N,
9423SelectionDAG &DAG) const{
9424// %cond = to_bool_vec %source
9425// %splat = splat /*val=*/VL
9426// %tz = step_vector
9427// %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9428// %r = vp.reduce.umin %v
9429SDLocDL(N);
9430SDValue Source =N->getOperand(0);
9431SDValue Mask =N->getOperand(1);
9432SDValue EVL =N->getOperand(2);
9433EVT SrcVT = Source.getValueType();
9434EVT ResVT =N->getValueType(0);
9435EVT ResVecVT =
9436EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9437
9438// Convert to boolean vector.
9439if (SrcVT.getScalarType() != MVT::i1) {
9440SDValue AllZero = DAG.getConstant(0,DL, SrcVT);
9441 SrcVT =EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9442 SrcVT.getVectorElementCount());
9443 Source = DAG.getNode(ISD::VP_SETCC,DL, SrcVT, Source, AllZero,
9444 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9445 }
9446
9447SDValue ExtEVL = DAG.getZExtOrTrunc(EVL,DL, ResVT);
9448SDValueSplat = DAG.getSplat(ResVecVT,DL, ExtEVL);
9449SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9450SDValueSelect =
9451 DAG.getNode(ISD::VP_SELECT,DL, ResVecVT, Source, StepVec,Splat, EVL);
9452return DAG.getNode(ISD::VP_REDUCE_UMIN,DL, ResVT, ExtEVL,Select, Mask, EVL);
9453}
9454
9455SDValueTargetLowering::expandVectorFindLastActive(SDNode *N,
9456SelectionDAG &DAG) const{
9457SDLocDL(N);
9458SDValue Mask =N->getOperand(0);
9459EVT MaskVT = Mask.getValueType();
9460EVT BoolVT = MaskVT.getScalarType();
9461
9462// Find a suitable type for a stepvector.
9463ConstantRange VScaleRange(1,/*isFullSet=*/true);// Fixed length default.
9464if (MaskVT.isScalableVector())
9465 VScaleRange =getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9466constTargetLowering &TLI = DAG.getTargetLoweringInfo();
9467unsigned EltWidth = TLI.getBitWidthForCttzElements(
9468 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9469/*ZeroIsPoison=*/true, &VScaleRange);
9470EVT StepVT =MVT::getIntegerVT(EltWidth);
9471EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9472
9473// If promotion is required to make the type legal, do it here; promotion
9474// of integers within LegalizeVectorOps is looking for types of the same
9475// size but with a smaller number of larger elements, not the usual larger
9476// size with the same number of larger elements.
9477if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9478TargetLowering::TypePromoteInteger) {
9479 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9480 StepVT = StepVecVT.getVectorElementType();
9481 }
9482
9483// Zero out lanes with inactive elements, then find the highest remaining
9484// value from the stepvector.
9485SDValue Zeroes = DAG.getConstant(0,DL, StepVecVT);
9486SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9487SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9488SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX,DL, StepVT, ActiveElts);
9489return DAG.getZExtOrTrunc(HighestIdx,DL,N->getValueType(0));
9490}
9491
9492SDValueTargetLowering::expandABS(SDNode *N,SelectionDAG &DAG,
9493bool IsNegative) const{
9494SDLoc dl(N);
9495EVT VT =N->getValueType(0);
9496SDValueOp =N->getOperand(0);
9497
9498// abs(x) -> smax(x,sub(0,x))
9499if (!IsNegative &&isOperationLegal(ISD::SUB, VT) &&
9500isOperationLegal(ISD::SMAX, VT)) {
9501SDValue Zero = DAG.getConstant(0, dl, VT);
9502Op = DAG.getFreeze(Op);
9503return DAG.getNode(ISD::SMAX, dl, VT,Op,
9504 DAG.getNode(ISD::SUB, dl, VT, Zero,Op));
9505 }
9506
9507// abs(x) -> umin(x,sub(0,x))
9508if (!IsNegative &&isOperationLegal(ISD::SUB, VT) &&
9509isOperationLegal(ISD::UMIN, VT)) {
9510SDValue Zero = DAG.getConstant(0, dl, VT);
9511Op = DAG.getFreeze(Op);
9512return DAG.getNode(ISD::UMIN, dl, VT,Op,
9513 DAG.getNode(ISD::SUB, dl, VT, Zero,Op));
9514 }
9515
9516// 0 - abs(x) -> smin(x, sub(0,x))
9517if (IsNegative &&isOperationLegal(ISD::SUB, VT) &&
9518isOperationLegal(ISD::SMIN, VT)) {
9519SDValue Zero = DAG.getConstant(0, dl, VT);
9520Op = DAG.getFreeze(Op);
9521return DAG.getNode(ISD::SMIN, dl, VT,Op,
9522 DAG.getNode(ISD::SUB, dl, VT, Zero,Op));
9523 }
9524
9525// Only expand vector types if we have the appropriate vector operations.
9526if (VT.isVector() &&
9527 (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9528 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9529 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9530 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9531returnSDValue();
9532
9533Op = DAG.getFreeze(Op);
9534SDValue Shift = DAG.getNode(
9535ISD::SRA, dl, VT,Op,
9536 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9537SDValueXor = DAG.getNode(ISD::XOR, dl, VT,Op, Shift);
9538
9539// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9540if (!IsNegative)
9541return DAG.getNode(ISD::SUB, dl, VT,Xor, Shift);
9542
9543// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9544return DAG.getNode(ISD::SUB, dl, VT, Shift,Xor);
9545}
9546
9547SDValueTargetLowering::expandABD(SDNode *N,SelectionDAG &DAG) const{
9548SDLoc dl(N);
9549EVT VT =N->getValueType(0);
9550SDValueLHS = DAG.getFreeze(N->getOperand(0));
9551SDValueRHS = DAG.getFreeze(N->getOperand(1));
9552bool IsSigned =N->getOpcode() ==ISD::ABDS;
9553
9554// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9555// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9556unsigned MaxOpc = IsSigned ?ISD::SMAX :ISD::UMAX;
9557unsigned MinOpc = IsSigned ?ISD::SMIN :ISD::UMIN;
9558if (isOperationLegal(MaxOpc, VT) &&isOperationLegal(MinOpc, VT)) {
9559SDValue Max = DAG.getNode(MaxOpc, dl, VT,LHS,RHS);
9560SDValue Min = DAG.getNode(MinOpc, dl, VT,LHS,RHS);
9561return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9562 }
9563
9564// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9565if (!IsSigned &&isOperationLegal(ISD::USUBSAT, VT))
9566return DAG.getNode(ISD::OR, dl, VT,
9567 DAG.getNode(ISD::USUBSAT, dl, VT,LHS,RHS),
9568 DAG.getNode(ISD::USUBSAT, dl, VT,RHS,LHS));
9569
9570// If the subtract doesn't overflow then just use abs(sub())
9571// NOTE: don't use frozen operands for value tracking.
9572bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9573 DAG.SignBitIsZero(N->getOperand(0));
9574
9575if (DAG.willNotOverflowSub(IsSigned || IsNonNegative,N->getOperand(0),
9576N->getOperand(1)))
9577return DAG.getNode(ISD::ABS, dl, VT,
9578 DAG.getNode(ISD::SUB, dl, VT,LHS,RHS));
9579
9580if (DAG.willNotOverflowSub(IsSigned || IsNonNegative,N->getOperand(1),
9581N->getOperand(0)))
9582return DAG.getNode(ISD::ABS, dl, VT,
9583 DAG.getNode(ISD::SUB, dl, VT,RHS,LHS));
9584
9585EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9586ISD::CondCodeCC = IsSigned ?ISD::CondCode::SETGT :ISD::CondCode::SETUGT;
9587SDValue Cmp = DAG.getSetCC(dl, CCVT,LHS,RHS,CC);
9588
9589// Branchless expansion iff cmp result is allbits:
9590// abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9591// abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9592if (CCVT == VT &&getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {
9593SDValue Diff = DAG.getNode(ISD::SUB, dl, VT,LHS,RHS);
9594SDValueXor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9595return DAG.getNode(ISD::SUB, dl, VT, Cmp,Xor);
9596 }
9597
9598// Similar to the branchless expansion, use the (sign-extended) usubo overflow
9599// flag if the (scalar) type is illegal as this is more likely to legalize
9600// cleanly:
9601// abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9602if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9603SDValue USubO =
9604 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9605SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9606SDValueXor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9607return DAG.getNode(ISD::SUB, dl, VT,Xor, Cmp);
9608 }
9609
9610// FIXME: Should really try to split the vector in case it's legal on a
9611// subvector.
9612if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9613return DAG.UnrollVectorOp(N);
9614
9615// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9616// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9617return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT,LHS,RHS),
9618 DAG.getNode(ISD::SUB, dl, VT,RHS,LHS));
9619}
9620
9621SDValueTargetLowering::expandAVG(SDNode *N,SelectionDAG &DAG) const{
9622SDLoc dl(N);
9623EVT VT =N->getValueType(0);
9624SDValueLHS =N->getOperand(0);
9625SDValueRHS =N->getOperand(1);
9626
9627unsigned Opc =N->getOpcode();
9628bool IsFloor = Opc ==ISD::AVGFLOORS || Opc ==ISD::AVGFLOORU;
9629bool IsSigned = Opc ==ISD::AVGCEILS || Opc ==ISD::AVGFLOORS;
9630unsigned SumOpc = IsFloor ?ISD::ADD :ISD::SUB;
9631unsigned SignOpc = IsFloor ?ISD::AND :ISD::OR;
9632unsigned ShiftOpc = IsSigned ?ISD::SRA :ISD::SRL;
9633unsigned ExtOpc = IsSigned ?ISD::SIGN_EXTEND :ISD::ZERO_EXTEND;
9634assert((Opc ==ISD::AVGFLOORS || Opc ==ISD::AVGCEILS ||
9635 Opc ==ISD::AVGFLOORU || Opc ==ISD::AVGCEILU) &&
9636"Unknown AVG node");
9637
9638// If the operands are already extended, we can add+shift.
9639bool IsExt =
9640 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9641 DAG.ComputeNumSignBits(RHS) >= 2) ||
9642 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9643 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9644if (IsExt) {
9645SDValue Sum = DAG.getNode(ISD::ADD, dl, VT,LHS,RHS);
9646if (!IsFloor)
9647 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9648return DAG.getNode(ShiftOpc, dl, VT, Sum,
9649 DAG.getShiftAmountConstant(1, VT, dl));
9650 }
9651
9652// For scalars, see if we can efficiently extend/truncate to use add+shift.
9653if (VT.isScalarInteger()) {
9654unsigned BW = VT.getScalarSizeInBits();
9655EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9656if (isTypeLegal(ExtVT) &&isTruncateFree(ExtVT, VT)) {
9657LHS = DAG.getNode(ExtOpc, dl, ExtVT,LHS);
9658RHS = DAG.getNode(ExtOpc, dl, ExtVT,RHS);
9659SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT,LHS,RHS);
9660if (!IsFloor)
9661 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9662 DAG.getConstant(1, dl, ExtVT));
9663// Just use SRL as we will be truncating away the extended sign bits.
9664 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9665 DAG.getShiftAmountConstant(1, ExtVT, dl));
9666return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9667 }
9668 }
9669
9670// avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9671if (Opc ==ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9672SDValue UAddWithOverflow =
9673 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS,LHS});
9674
9675SDValue Sum = UAddWithOverflow.getValue(0);
9676SDValue Overflow = UAddWithOverflow.getValue(1);
9677
9678// Right shift the sum by 1
9679SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9680 DAG.getShiftAmountConstant(1, VT, dl));
9681
9682SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9683SDValue OverflowShl = DAG.getNode(
9684ISD::SHL, dl, VT, ZeroExtOverflow,
9685 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9686
9687return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9688 }
9689
9690// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9691// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9692// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9693// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9694LHS = DAG.getFreeze(LHS);
9695RHS = DAG.getFreeze(RHS);
9696SDValue Sign = DAG.getNode(SignOpc, dl, VT,LHS,RHS);
9697SDValueXor = DAG.getNode(ISD::XOR, dl, VT,LHS,RHS);
9698SDValue Shift =
9699 DAG.getNode(ShiftOpc, dl, VT,Xor, DAG.getShiftAmountConstant(1, VT, dl));
9700return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9701}
9702
9703SDValueTargetLowering::expandBSWAP(SDNode *N,SelectionDAG &DAG) const{
9704SDLoc dl(N);
9705EVT VT =N->getValueType(0);
9706SDValueOp =N->getOperand(0);
9707
9708if (!VT.isSimple())
9709returnSDValue();
9710
9711EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());
9712SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9713switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9714default:
9715returnSDValue();
9716case MVT::i16:
9717// Use a rotate by 8. This can be further expanded if necessary.
9718return DAG.getNode(ISD::ROTL, dl, VT,Op, DAG.getConstant(8, dl, SHVT));
9719case MVT::i32:
9720 Tmp4 = DAG.getNode(ISD::SHL, dl, VT,Op, DAG.getConstant(24, dl, SHVT));
9721 Tmp3 = DAG.getNode(ISD::AND, dl, VT,Op,
9722 DAG.getConstant(0xFF00, dl, VT));
9723 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9724 Tmp2 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT));
9725 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9726 Tmp1 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT));
9727 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9728 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9729return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9730case MVT::i64:
9731 Tmp8 = DAG.getNode(ISD::SHL, dl, VT,Op, DAG.getConstant(56, dl, SHVT));
9732 Tmp7 = DAG.getNode(ISD::AND, dl, VT,Op,
9733 DAG.getConstant(255ULL<<8, dl, VT));
9734 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9735 Tmp6 = DAG.getNode(ISD::AND, dl, VT,Op,
9736 DAG.getConstant(255ULL<<16, dl, VT));
9737 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9738 Tmp5 = DAG.getNode(ISD::AND, dl, VT,Op,
9739 DAG.getConstant(255ULL<<24, dl, VT));
9740 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9741 Tmp4 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT));
9742 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9743 DAG.getConstant(255ULL<<24, dl, VT));
9744 Tmp3 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT));
9745 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9746 DAG.getConstant(255ULL<<16, dl, VT));
9747 Tmp2 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(40, dl, SHVT));
9748 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9749 DAG.getConstant(255ULL<<8, dl, VT));
9750 Tmp1 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(56, dl, SHVT));
9751 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9752 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9753 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9754 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9755 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9756 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9757return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9758 }
9759}
9760
9761SDValueTargetLowering::expandVPBSWAP(SDNode *N,SelectionDAG &DAG) const{
9762SDLoc dl(N);
9763EVT VT =N->getValueType(0);
9764SDValueOp =N->getOperand(0);
9765SDValue Mask =N->getOperand(1);
9766SDValue EVL =N->getOperand(2);
9767
9768if (!VT.isSimple())
9769returnSDValue();
9770
9771EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());
9772SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9773switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9774default:
9775returnSDValue();
9776case MVT::i16:
9777 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),
9778 Mask, EVL);
9779 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),
9780 Mask, EVL);
9781return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9782case MVT::i32:
9783 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT,Op, DAG.getConstant(24, dl, SHVT),
9784 Mask, EVL);
9785 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,Op, DAG.getConstant(0xFF00, dl, VT),
9786 Mask, EVL);
9787 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9788 Mask, EVL);
9789 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),
9790 Mask, EVL);
9791 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9792 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9793 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT),
9794 Mask, EVL);
9795 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9796 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9797return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9798case MVT::i64:
9799 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT,Op, DAG.getConstant(56, dl, SHVT),
9800 Mask, EVL);
9801 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT,Op,
9802 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9803 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9804 Mask, EVL);
9805 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT,Op,
9806 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9807 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9808 Mask, EVL);
9809 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT,Op,
9810 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9811 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9812 Mask, EVL);
9813 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),
9814 Mask, EVL);
9815 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9816 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9817 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT),
9818 Mask, EVL);
9819 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9820 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9821 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(40, dl, SHVT),
9822 Mask, EVL);
9823 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9824 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9825 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(56, dl, SHVT),
9826 Mask, EVL);
9827 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9828 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9829 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9830 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9831 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9832 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9833return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9834 }
9835}
9836
9837SDValueTargetLowering::expandBITREVERSE(SDNode *N,SelectionDAG &DAG) const{
9838SDLoc dl(N);
9839EVT VT =N->getValueType(0);
9840SDValueOp =N->getOperand(0);
9841EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());
9842unsigned Sz = VT.getScalarSizeInBits();
9843
9844SDValue Tmp, Tmp2, Tmp3;
9845
9846// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9847// and finally the i1 pairs.
9848// TODO: We can easily support i4/i2 legal types if any target ever does.
9849if (Sz >= 8 &&isPowerOf2_32(Sz)) {
9850// Create the masks - repeating the pattern every byte.
9851APInt Mask4 =APInt::getSplat(Sz,APInt(8, 0x0F));
9852APInt Mask2 =APInt::getSplat(Sz,APInt(8, 0x33));
9853APInt Mask1 =APInt::getSplat(Sz,APInt(8, 0x55));
9854
9855// BSWAP if the type is wider than a single byte.
9856 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT,Op) :Op);
9857
9858// swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9859 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9860 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9861 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9862 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9863 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9864
9865// swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9866 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9867 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9868 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9869 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9870 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9871
9872// swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9873 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9874 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9875 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9876 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9877 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9878return Tmp;
9879 }
9880
9881 Tmp = DAG.getConstant(0, dl, VT);
9882for (unsignedI = 0, J = Sz-1;I < Sz; ++I, --J) {
9883if (I < J)
9884 Tmp2 =
9885 DAG.getNode(ISD::SHL, dl, VT,Op, DAG.getConstant(J -I, dl, SHVT));
9886else
9887 Tmp2 =
9888 DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(I - J, dl, SHVT));
9889
9890APInt Shift =APInt::getOneBitSet(Sz, J);
9891 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9892 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9893 }
9894
9895return Tmp;
9896}
9897
9898SDValueTargetLowering::expandVPBITREVERSE(SDNode *N,SelectionDAG &DAG) const{
9899assert(N->getOpcode() == ISD::VP_BITREVERSE);
9900
9901SDLoc dl(N);
9902EVT VT =N->getValueType(0);
9903SDValueOp =N->getOperand(0);
9904SDValue Mask =N->getOperand(1);
9905SDValue EVL =N->getOperand(2);
9906EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());
9907unsigned Sz = VT.getScalarSizeInBits();
9908
9909SDValue Tmp, Tmp2, Tmp3;
9910
9911// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9912// and finally the i1 pairs.
9913// TODO: We can easily support i4/i2 legal types if any target ever does.
9914if (Sz >= 8 &&isPowerOf2_32(Sz)) {
9915// Create the masks - repeating the pattern every byte.
9916APInt Mask4 =APInt::getSplat(Sz,APInt(8, 0x0F));
9917APInt Mask2 =APInt::getSplat(Sz,APInt(8, 0x33));
9918APInt Mask1 =APInt::getSplat(Sz,APInt(8, 0x55));
9919
9920// BSWAP if the type is wider than a single byte.
9921 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT,Op, Mask, EVL) :Op);
9922
9923// swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9924 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9925 Mask, EVL);
9926 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9927 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9928 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9929 Mask, EVL);
9930 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9931 Mask, EVL);
9932 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9933
9934// swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9935 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9936 Mask, EVL);
9937 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9938 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9939 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9940 Mask, EVL);
9941 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9942 Mask, EVL);
9943 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9944
9945// swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9946 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9947 Mask, EVL);
9948 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9949 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9950 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9951 Mask, EVL);
9952 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9953 Mask, EVL);
9954 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9955return Tmp;
9956 }
9957returnSDValue();
9958}
9959
9960std::pair<SDValue, SDValue>
9961TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9962SelectionDAG &DAG) const{
9963SDLoc SL(LD);
9964SDValue Chain = LD->getChain();
9965SDValue BasePTR = LD->getBasePtr();
9966EVT SrcVT = LD->getMemoryVT();
9967EVT DstVT = LD->getValueType(0);
9968ISD::LoadExtType ExtType = LD->getExtensionType();
9969
9970if (SrcVT.isScalableVector())
9971report_fatal_error("Cannot scalarize scalable vector loads");
9972
9973unsigned NumElem = SrcVT.getVectorNumElements();
9974
9975EVT SrcEltVT = SrcVT.getScalarType();
9976EVT DstEltVT = DstVT.getScalarType();
9977
9978// A vector must always be stored in memory as-is, i.e. without any padding
9979// between the elements, since various code depend on it, e.g. in the
9980// handling of a bitcast of a vector type to int, which may be done with a
9981// vector store followed by an integer load. A vector that does not have
9982// elements that are byte-sized must therefore be stored as an integer
9983// built out of the extracted vector elements.
9984if (!SrcEltVT.isByteSized()) {
9985unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9986EVT LoadVT =EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9987
9988unsigned NumSrcBits = SrcVT.getSizeInBits();
9989EVT SrcIntVT =EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9990
9991unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9992SDValue SrcEltBitMask = DAG.getConstant(
9993APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9994
9995// Load the whole vector and avoid masking off the top bits as it makes
9996// the codegen worse.
9997SDValue Load =
9998 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9999 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
10000 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10001
10002SmallVector<SDValue, 8> Vals;
10003for (unsignedIdx = 0;Idx < NumElem; ++Idx) {
10004unsigned ShiftIntoIdx =
10005 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) -Idx :Idx);
10006SDValue ShiftAmount = DAG.getShiftAmountConstant(
10007 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10008SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10009SDValue Elt =
10010 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10011SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10012
10013if (ExtType !=ISD::NON_EXTLOAD) {
10014unsigned ExtendOp =ISD::getExtForLoadExtType(false, ExtType);
10015 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10016 }
10017
10018 Vals.push_back(Scalar);
10019 }
10020
10021SDValueValue = DAG.getBuildVector(DstVT, SL, Vals);
10022return std::make_pair(Value, Load.getValue(1));
10023 }
10024
10025unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10026assert(SrcEltVT.isByteSized());
10027
10028SmallVector<SDValue, 8> Vals;
10029SmallVector<SDValue, 8> LoadChains;
10030
10031for (unsignedIdx = 0;Idx < NumElem; ++Idx) {
10032SDValue ScalarLoad =
10033 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
10034 LD->getPointerInfo().getWithOffset(Idx * Stride),
10035 SrcEltVT, LD->getOriginalAlign(),
10036 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10037
10038 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR,TypeSize::getFixed(Stride));
10039
10040 Vals.push_back(ScalarLoad.getValue(0));
10041 LoadChains.push_back(ScalarLoad.getValue(1));
10042 }
10043
10044SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10045SDValueValue = DAG.getBuildVector(DstVT, SL, Vals);
10046
10047return std::make_pair(Value, NewChain);
10048}
10049
10050SDValueTargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10051SelectionDAG &DAG) const{
10052SDLoc SL(ST);
10053
10054SDValue Chain = ST->getChain();
10055SDValue BasePtr = ST->getBasePtr();
10056SDValueValue = ST->getValue();
10057EVT StVT = ST->getMemoryVT();
10058
10059if (StVT.isScalableVector())
10060report_fatal_error("Cannot scalarize scalable vector stores");
10061
10062// The type of the data we want to save
10063EVT RegVT =Value.getValueType();
10064EVT RegSclVT = RegVT.getScalarType();
10065
10066// The type of data as saved in memory.
10067EVT MemSclVT = StVT.getScalarType();
10068
10069unsigned NumElem = StVT.getVectorNumElements();
10070
10071// A vector must always be stored in memory as-is, i.e. without any padding
10072// between the elements, since various code depend on it, e.g. in the
10073// handling of a bitcast of a vector type to int, which may be done with a
10074// vector store followed by an integer load. A vector that does not have
10075// elements that are byte-sized must therefore be stored as an integer
10076// built out of the extracted vector elements.
10077if (!MemSclVT.isByteSized()) {
10078unsigned NumBits = StVT.getSizeInBits();
10079EVT IntVT =EVT::getIntegerVT(*DAG.getContext(), NumBits);
10080
10081SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10082
10083for (unsignedIdx = 0;Idx < NumElem; ++Idx) {
10084SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT,Value,
10085 DAG.getVectorIdxConstant(Idx, SL));
10086SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10087SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10088unsigned ShiftIntoIdx =
10089 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) -Idx :Idx);
10090SDValue ShiftAmount =
10091 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10092SDValue ShiftedElt =
10093 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10094 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10095 }
10096
10097return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10098 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10099 ST->getAAInfo());
10100 }
10101
10102// Store Stride in bytes
10103unsigned Stride = MemSclVT.getSizeInBits() / 8;
10104assert(Stride &&"Zero stride!");
10105// Extract each of the elements from the original vector and save them into
10106// memory individually.
10107SmallVector<SDValue, 8> Stores;
10108for (unsignedIdx = 0;Idx < NumElem; ++Idx) {
10109SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT,Value,
10110 DAG.getVectorIdxConstant(Idx, SL));
10111
10112SDValuePtr =
10113 DAG.getObjectPtrOffset(SL, BasePtr,TypeSize::getFixed(Idx * Stride));
10114
10115// This scalar TruncStore may be illegal, but we legalize it later.
10116SDValue Store = DAG.getTruncStore(
10117 Chain, SL, Elt,Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10118 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10119 ST->getAAInfo());
10120
10121 Stores.push_back(Store);
10122 }
10123
10124return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10125}
10126
10127std::pair<SDValue, SDValue>
10128TargetLowering::expandUnalignedLoad(LoadSDNode *LD,SelectionDAG &DAG) const{
10129assert(LD->getAddressingMode() ==ISD::UNINDEXED &&
10130"unaligned indexed loads not implemented!");
10131SDValue Chain = LD->getChain();
10132SDValuePtr = LD->getBasePtr();
10133EVT VT = LD->getValueType(0);
10134EVT LoadedVT = LD->getMemoryVT();
10135SDLoc dl(LD);
10136auto &MF = DAG.getMachineFunction();
10137
10138if (VT.isFloatingPoint() || VT.isVector()) {
10139EVT intVT =EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10140if (isTypeLegal(intVT) &&isTypeLegal(LoadedVT)) {
10141if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10142 LoadedVT.isVector()) {
10143// Scalarize the load and let the individual components be handled.
10144return scalarizeVectorLoad(LD, DAG);
10145 }
10146
10147// Expand to a (misaligned) integer load of the same size,
10148// then bitconvert to floating point or vector.
10149SDValue newLoad = DAG.getLoad(intVT, dl, Chain,Ptr,
10150 LD->getMemOperand());
10151SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10152if (LoadedVT != VT)
10153 Result = DAG.getNode(VT.isFloatingPoint() ?ISD::FP_EXTEND :
10154ISD::ANY_EXTEND, dl, VT, Result);
10155
10156return std::make_pair(Result, newLoad.getValue(1));
10157 }
10158
10159// Copy the value to a (aligned) stack slot using (unaligned) integer
10160// loads and stores, then do a (aligned) load from the stack slot.
10161MVT RegVT =getRegisterType(*DAG.getContext(), intVT);
10162unsigned LoadedBytes = LoadedVT.getStoreSize();
10163unsigned RegBytes = RegVT.getSizeInBits() / 8;
10164unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10165
10166// Make sure the stack slot is also aligned for the register type.
10167SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10168auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10169SmallVector<SDValue, 8> Stores;
10170SDValue StackPtr = StackBase;
10171unsignedOffset = 0;
10172
10173EVT PtrVT =Ptr.getValueType();
10174EVT StackPtrVT = StackPtr.getValueType();
10175
10176SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10177SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10178
10179// Do all but one copies using the full register width.
10180for (unsigned i = 1; i < NumRegs; i++) {
10181// Load one integer register's worth from the original location.
10182SDValue Load = DAG.getLoad(
10183 RegVT, dl, Chain,Ptr, LD->getPointerInfo().getWithOffset(Offset),
10184 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10185 LD->getAAInfo());
10186// Follow the load with a store to the stack slot. Remember the store.
10187 Stores.push_back(DAG.getStore(
10188 Load.getValue(1), dl, Load, StackPtr,
10189MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset)));
10190// Increment the pointers.
10191Offset += RegBytes;
10192
10193Ptr = DAG.getObjectPtrOffset(dl,Ptr, PtrIncrement);
10194 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10195 }
10196
10197// The last copy may be partial. Do an extending load.
10198EVT MemVT =EVT::getIntegerVT(*DAG.getContext(),
10199 8 * (LoadedBytes -Offset));
10200SDValue Load =
10201 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain,Ptr,
10202 LD->getPointerInfo().getWithOffset(Offset), MemVT,
10203 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10204 LD->getAAInfo());
10205// Follow the load with a store to the stack slot. Remember the store.
10206// On big-endian machines this requires a truncating store to ensure
10207// that the bits end up in the right place.
10208 Stores.push_back(DAG.getTruncStore(
10209 Load.getValue(1), dl, Load, StackPtr,
10210MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset), MemVT));
10211
10212// The order of the stores doesn't matter - say it with a TokenFactor.
10213SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10214
10215// Finally, perform the original load only redirected to the stack slot.
10216 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10217MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10218 LoadedVT);
10219
10220// Callers expect a MERGE_VALUES node.
10221return std::make_pair(Load, TF);
10222 }
10223
10224assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10225"Unaligned load of unsupported type.");
10226
10227// Compute the new VT that is half the size of the old one. This is an
10228// integer MVT.
10229unsigned NumBits = LoadedVT.getSizeInBits();
10230EVT NewLoadedVT;
10231 NewLoadedVT =EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10232 NumBits >>= 1;
10233
10234Align Alignment = LD->getOriginalAlign();
10235unsigned IncrementSize = NumBits / 8;
10236ISD::LoadExtType HiExtType = LD->getExtensionType();
10237
10238// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10239if (HiExtType ==ISD::NON_EXTLOAD)
10240 HiExtType =ISD::ZEXTLOAD;
10241
10242// Load the value in two parts
10243SDValueLo,Hi;
10244if (DAG.getDataLayout().isLittleEndian()) {
10245Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain,Ptr, LD->getPointerInfo(),
10246 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10247 LD->getAAInfo());
10248
10249Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(IncrementSize));
10250Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain,Ptr,
10251 LD->getPointerInfo().getWithOffset(IncrementSize),
10252 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10253 LD->getAAInfo());
10254 }else {
10255Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain,Ptr, LD->getPointerInfo(),
10256 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10257 LD->getAAInfo());
10258
10259Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(IncrementSize));
10260Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain,Ptr,
10261 LD->getPointerInfo().getWithOffset(IncrementSize),
10262 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10263 LD->getAAInfo());
10264 }
10265
10266// aggregate the two parts
10267SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10268SDValue Result = DAG.getNode(ISD::SHL, dl, VT,Hi, ShiftAmount);
10269 Result = DAG.getNode(ISD::OR, dl, VT, Result,Lo);
10270
10271SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,Lo.getValue(1),
10272Hi.getValue(1));
10273
10274return std::make_pair(Result, TF);
10275}
10276
10277SDValueTargetLowering::expandUnalignedStore(StoreSDNode *ST,
10278SelectionDAG &DAG) const{
10279assert(ST->getAddressingMode() ==ISD::UNINDEXED &&
10280"unaligned indexed stores not implemented!");
10281SDValue Chain = ST->getChain();
10282SDValuePtr = ST->getBasePtr();
10283SDValue Val = ST->getValue();
10284EVT VT = Val.getValueType();
10285Align Alignment = ST->getOriginalAlign();
10286auto &MF = DAG.getMachineFunction();
10287EVT StoreMemVT = ST->getMemoryVT();
10288
10289SDLoc dl(ST);
10290if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10291EVT intVT =EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10292if (isTypeLegal(intVT)) {
10293if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10294 StoreMemVT.isVector()) {
10295// Scalarize the store and let the individual components be handled.
10296SDValue Result =scalarizeVectorStore(ST, DAG);
10297return Result;
10298 }
10299// Expand to a bitconvert of the value to the integer type of the
10300// same size, then a (misaligned) int store.
10301// FIXME: Does not handle truncating floating point stores!
10302SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10303 Result = DAG.getStore(Chain, dl, Result,Ptr, ST->getPointerInfo(),
10304 Alignment, ST->getMemOperand()->getFlags());
10305return Result;
10306 }
10307// Do a (aligned) store to a stack slot, then copy from the stack slot
10308// to the final destination using (unaligned) integer loads and stores.
10309MVT RegVT =getRegisterType(
10310 *DAG.getContext(),
10311EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10312EVT PtrVT =Ptr.getValueType();
10313unsigned StoredBytes = StoreMemVT.getStoreSize();
10314unsigned RegBytes = RegVT.getSizeInBits() / 8;
10315unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10316
10317// Make sure the stack slot is also aligned for the register type.
10318SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10319auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10320
10321// Perform the original store, only redirected to the stack slot.
10322SDValue Store = DAG.getTruncStore(
10323 Chain, dl, Val, StackPtr,
10324MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10325
10326EVT StackPtrVT = StackPtr.getValueType();
10327
10328SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10329SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10330SmallVector<SDValue, 8> Stores;
10331unsignedOffset = 0;
10332
10333// Do all but one copies using the full register width.
10334for (unsigned i = 1; i < NumRegs; i++) {
10335// Load one integer register's worth from the stack slot.
10336SDValue Load = DAG.getLoad(
10337 RegVT, dl, Store, StackPtr,
10338MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset));
10339// Store it to the final location. Remember the store.
10340 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load,Ptr,
10341 ST->getPointerInfo().getWithOffset(Offset),
10342 ST->getOriginalAlign(),
10343 ST->getMemOperand()->getFlags()));
10344// Increment the pointers.
10345Offset += RegBytes;
10346 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10347Ptr = DAG.getObjectPtrOffset(dl,Ptr, PtrIncrement);
10348 }
10349
10350// The last store may be partial. Do a truncating store. On big-endian
10351// machines this requires an extending load from the stack slot to ensure
10352// that the bits are in the right place.
10353EVT LoadMemVT =
10354EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes -Offset));
10355
10356// Load from the stack slot.
10357SDValue Load = DAG.getExtLoad(
10358ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10359MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset), LoadMemVT);
10360
10361 Stores.push_back(
10362 DAG.getTruncStore(Load.getValue(1), dl, Load,Ptr,
10363 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10364 ST->getOriginalAlign(),
10365 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10366// The order of the stores doesn't matter - say it with a TokenFactor.
10367SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10368return Result;
10369 }
10370
10371assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10372"Unaligned store of unknown type.");
10373// Get the half-size VT
10374EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10375unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10376unsigned IncrementSize = NumBits / 8;
10377
10378// Divide the stored value in two parts.
10379SDValue ShiftAmount =
10380 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10381SDValueLo = Val;
10382// If Val is a constant, replace the upper bits with 0. The SRL will constant
10383// fold and not use the upper bits. A smaller constant may be easier to
10384// materialize.
10385if (auto *C = dyn_cast<ConstantSDNode>(Lo);C && !C->isOpaque())
10386Lo = DAG.getNode(
10387ISD::AND, dl, VT,Lo,
10388 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10389 VT));
10390SDValueHi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10391
10392// Store the two parts
10393SDValue Store1, Store2;
10394 Store1 = DAG.getTruncStore(Chain, dl,
10395 DAG.getDataLayout().isLittleEndian() ?Lo :Hi,
10396Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10397 ST->getMemOperand()->getFlags());
10398
10399Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(IncrementSize));
10400 Store2 = DAG.getTruncStore(
10401 Chain, dl, DAG.getDataLayout().isLittleEndian() ?Hi :Lo,Ptr,
10402 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10403 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10404
10405SDValue Result =
10406 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10407return Result;
10408}
10409
10410SDValue
10411TargetLowering::IncrementMemoryAddress(SDValueAddr,SDValue Mask,
10412constSDLoc &DL,EVT DataVT,
10413SelectionDAG &DAG,
10414bool IsCompressedMemory) const{
10415SDValue Increment;
10416EVT AddrVT =Addr.getValueType();
10417EVT MaskVT = Mask.getValueType();
10418assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10419"Incompatible types of Data and Mask");
10420if (IsCompressedMemory) {
10421if (DataVT.isScalableVector())
10422report_fatal_error(
10423"Cannot currently handle compressed memory with scalable vectors");
10424// Incrementing the pointer according to number of '1's in the mask.
10425EVT MaskIntVT =EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10426SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10427if (MaskIntVT.getSizeInBits() < 32) {
10428 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i32, MaskInIntReg);
10429 MaskIntVT = MVT::i32;
10430 }
10431
10432// Count '1's with POPCNT.
10433 Increment = DAG.getNode(ISD::CTPOP,DL, MaskIntVT, MaskInIntReg);
10434 Increment = DAG.getZExtOrTrunc(Increment,DL, AddrVT);
10435// Scale is an element size in bytes.
10436SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8,DL,
10437 AddrVT);
10438 Increment = DAG.getNode(ISD::MUL,DL, AddrVT, Increment, Scale);
10439 }elseif (DataVT.isScalableVector()) {
10440 Increment = DAG.getVScale(DL, AddrVT,
10441APInt(AddrVT.getFixedSizeInBits(),
10442 DataVT.getStoreSize().getKnownMinValue()));
10443 }else
10444 Increment = DAG.getConstant(DataVT.getStoreSize(),DL, AddrVT);
10445
10446return DAG.getNode(ISD::ADD,DL, AddrVT,Addr, Increment);
10447}
10448
10449staticSDValueclampDynamicVectorIndex(SelectionDAG &DAG,SDValueIdx,
10450EVT VecVT,constSDLoc &dl,
10451ElementCount SubEC) {
10452assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10453"Cannot index a scalable vector within a fixed-width vector");
10454
10455unsigned NElts = VecVT.getVectorMinNumElements();
10456unsigned NumSubElts = SubEC.getKnownMinValue();
10457EVT IdxVT =Idx.getValueType();
10458
10459if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10460// If this is a constant index and we know the value plus the number of the
10461// elements in the subvector minus one is less than the minimum number of
10462// elements then it's safe to return Idx.
10463if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10464if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10465returnIdx;
10466SDValue VS =
10467 DAG.getVScale(dl, IdxVT,APInt(IdxVT.getFixedSizeInBits(), NElts));
10468unsigned SubOpcode = NumSubElts <= NElts ?ISD::SUB :ISD::USUBSAT;
10469SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10470 DAG.getConstant(NumSubElts, dl, IdxVT));
10471return DAG.getNode(ISD::UMIN, dl, IdxVT,Idx, Sub);
10472 }
10473if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10474APInt Imm =APInt::getLowBitsSet(IdxVT.getSizeInBits(),Log2_32(NElts));
10475return DAG.getNode(ISD::AND, dl, IdxVT,Idx,
10476 DAG.getConstant(Imm, dl, IdxVT));
10477 }
10478unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10479return DAG.getNode(ISD::UMIN, dl, IdxVT,Idx,
10480 DAG.getConstant(MaxIndex, dl, IdxVT));
10481}
10482
10483SDValueTargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10484SDValue VecPtr,EVT VecVT,
10485SDValue Index) const{
10486return getVectorSubVecPointer(
10487 DAG, VecPtr, VecVT,
10488EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10489 Index);
10490}
10491
10492SDValueTargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10493SDValue VecPtr,EVT VecVT,
10494EVT SubVecVT,
10495SDValue Index) const{
10496SDLoc dl(Index);
10497// Make sure the index type is big enough to compute in.
10498 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10499
10500EVT EltVT = VecVT.getVectorElementType();
10501
10502// Calculate the element offset and add it to the pointer.
10503unsigned EltSize = EltVT.getFixedSizeInBits() / 8;// FIXME: should be ABI size.
10504assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10505"Converting bits to bytes lost precision");
10506assert(SubVecVT.getVectorElementType() == EltVT &&
10507"Sub-vector must be a vector with matching element type");
10508 Index =clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10509 SubVecVT.getVectorElementCount());
10510
10511EVT IdxVT = Index.getValueType();
10512if (SubVecVT.isScalableVector())
10513 Index =
10514 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10515 DAG.getVScale(dl, IdxVT,APInt(IdxVT.getSizeInBits(), 1)));
10516
10517 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10518 DAG.getConstant(EltSize, dl, IdxVT));
10519return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10520}
10521
10522//===----------------------------------------------------------------------===//
10523// Implementation of Emulated TLS Model
10524//===----------------------------------------------------------------------===//
10525
10526SDValueTargetLowering::LowerToTLSEmulatedModel(constGlobalAddressSDNode *GA,
10527SelectionDAG &DAG) const{
10528// Access to address of TLS varialbe xyz is lowered to a function call:
10529// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10530EVT PtrVT =getPointerTy(DAG.getDataLayout());
10531PointerType *VoidPtrType =PointerType::get(*DAG.getContext(), 0);
10532SDLoc dl(GA);
10533
10534ArgListTy Args;
10535ArgListEntry Entry;
10536constGlobalValue *GV =
10537 cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10538SmallString<32> NameString("__emutls_v.");
10539 NameString += GV->getName();
10540StringRef EmuTlsVarName(NameString);
10541constGlobalVariable *EmuTlsVar =
10542 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10543assert(EmuTlsVar &&"Cannot find EmuTlsVar ");
10544 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10545 Entry.Ty = VoidPtrType;
10546 Args.push_back(Entry);
10547
10548SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10549
10550TargetLowering::CallLoweringInfo CLI(DAG);
10551 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10552 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10553 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10554
10555// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10556// At last for X86 targets, maybe good for other targets too?
10557MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10558 MFI.setAdjustsStack(true);// Is this only for X86 target?
10559 MFI.setHasCalls(true);
10560
10561assert((GA->getOffset() == 0) &&
10562"Emulated TLS must have zero offset in GlobalAddressSDNode");
10563return CallResult.first;
10564}
10565
10566SDValueTargetLowering::lowerCmpEqZeroToCtlzSrl(SDValueOp,
10567SelectionDAG &DAG) const{
10568assert((Op->getOpcode() ==ISD::SETCC) &&"Input has to be a SETCC node.");
10569if (!isCtlzFast())
10570returnSDValue();
10571ISD::CondCodeCC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10572SDLoc dl(Op);
10573if (isNullConstant(Op.getOperand(1)) &&CC ==ISD::SETEQ) {
10574EVT VT =Op.getOperand(0).getValueType();
10575SDValue Zext =Op.getOperand(0);
10576if (VT.bitsLT(MVT::i32)) {
10577 VT = MVT::i32;
10578 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Op.getOperand(0));
10579 }
10580unsigned Log2b =Log2_32(VT.getSizeInBits());
10581SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10582SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10583 DAG.getConstant(Log2b, dl, MVT::i32));
10584return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10585 }
10586returnSDValue();
10587}
10588
10589SDValueTargetLowering::expandIntMINMAX(SDNode *Node,SelectionDAG &DAG) const{
10590SDValue Op0 = Node->getOperand(0);
10591SDValue Op1 = Node->getOperand(1);
10592EVT VT = Op0.getValueType();
10593EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10594unsigned Opcode = Node->getOpcode();
10595SDLocDL(Node);
10596
10597// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10598if (Opcode ==ISD::UMAX &&llvm::isOneOrOneSplat(Op1,true) && BoolVT == VT &&
10599getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {
10600 Op0 = DAG.getFreeze(Op0);
10601SDValue Zero = DAG.getConstant(0,DL, VT);
10602return DAG.getNode(ISD::SUB,DL, VT, Op0,
10603 DAG.getSetCC(DL, VT, Op0, Zero,ISD::SETEQ));
10604 }
10605
10606// umin(x,y) -> sub(x,usubsat(x,y))
10607// TODO: Missing freeze(Op0)?
10608if (Opcode ==ISD::UMIN &&isOperationLegal(ISD::SUB, VT) &&
10609isOperationLegal(ISD::USUBSAT, VT)) {
10610return DAG.getNode(ISD::SUB,DL, VT, Op0,
10611 DAG.getNode(ISD::USUBSAT,DL, VT, Op0, Op1));
10612 }
10613
10614// umax(x,y) -> add(x,usubsat(y,x))
10615// TODO: Missing freeze(Op0)?
10616if (Opcode ==ISD::UMAX &&isOperationLegal(ISD::ADD, VT) &&
10617isOperationLegal(ISD::USUBSAT, VT)) {
10618return DAG.getNode(ISD::ADD,DL, VT, Op0,
10619 DAG.getNode(ISD::USUBSAT,DL, VT, Op1, Op0));
10620 }
10621
10622// FIXME: Should really try to split the vector in case it's legal on a
10623// subvector.
10624if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10625return DAG.UnrollVectorOp(Node);
10626
10627// Attempt to find an existing SETCC node that we can reuse.
10628// TODO: Do we need a generic doesSETCCNodeExist?
10629// TODO: Missing freeze(Op0)/freeze(Op1)?
10630auto buildMinMax = [&](ISD::CondCode PrefCC,ISD::CondCode AltCC,
10631ISD::CondCode PrefCommuteCC,
10632ISD::CondCode AltCommuteCC) {
10633SDVTList BoolVTList = DAG.getVTList(BoolVT);
10634for (ISD::CondCodeCC : {PrefCC, AltCC}) {
10635if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10636 {Op0, Op1, DAG.getCondCode(CC)})) {
10637SDValueCond = DAG.getSetCC(DL, BoolVT, Op0, Op1,CC);
10638return DAG.getSelect(DL, VT,Cond, Op0, Op1);
10639 }
10640 }
10641for (ISD::CondCodeCC : {PrefCommuteCC, AltCommuteCC}) {
10642if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10643 {Op0, Op1, DAG.getCondCode(CC)})) {
10644SDValueCond = DAG.getSetCC(DL, BoolVT, Op0, Op1,CC);
10645return DAG.getSelect(DL, VT,Cond, Op1, Op0);
10646 }
10647 }
10648SDValueCond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10649return DAG.getSelect(DL, VT,Cond, Op0, Op1);
10650 };
10651
10652// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10653// -> Y = (A < B) ? B : A
10654// -> Y = (A >= B) ? A : B
10655// -> Y = (A <= B) ? B : A
10656switch (Opcode) {
10657caseISD::SMAX:
10658return buildMinMax(ISD::SETGT,ISD::SETGE,ISD::SETLT,ISD::SETLE);
10659caseISD::SMIN:
10660return buildMinMax(ISD::SETLT,ISD::SETLE,ISD::SETGT,ISD::SETGE);
10661caseISD::UMAX:
10662return buildMinMax(ISD::SETUGT,ISD::SETUGE,ISD::SETULT,ISD::SETULE);
10663caseISD::UMIN:
10664return buildMinMax(ISD::SETULT,ISD::SETULE,ISD::SETUGT,ISD::SETUGE);
10665 }
10666
10667llvm_unreachable("How did we get here?");
10668}
10669
10670SDValueTargetLowering::expandAddSubSat(SDNode *Node,SelectionDAG &DAG) const{
10671unsigned Opcode = Node->getOpcode();
10672SDValueLHS = Node->getOperand(0);
10673SDValueRHS = Node->getOperand(1);
10674EVT VT =LHS.getValueType();
10675SDLoc dl(Node);
10676
10677assert(VT ==RHS.getValueType() &&"Expected operands to be the same type");
10678assert(VT.isInteger() &&"Expected operands to be integers");
10679
10680// usub.sat(a, b) -> umax(a, b) - b
10681if (Opcode ==ISD::USUBSAT &&isOperationLegal(ISD::UMAX, VT)) {
10682SDValue Max = DAG.getNode(ISD::UMAX, dl, VT,LHS,RHS);
10683return DAG.getNode(ISD::SUB, dl, VT, Max,RHS);
10684 }
10685
10686// uadd.sat(a, b) -> umin(a, ~b) + b
10687if (Opcode ==ISD::UADDSAT &&isOperationLegal(ISD::UMIN, VT)) {
10688SDValue InvRHS = DAG.getNOT(dl,RHS, VT);
10689SDValue Min = DAG.getNode(ISD::UMIN, dl, VT,LHS, InvRHS);
10690return DAG.getNode(ISD::ADD, dl, VT, Min,RHS);
10691 }
10692
10693unsigned OverflowOp;
10694switch (Opcode) {
10695caseISD::SADDSAT:
10696 OverflowOp =ISD::SADDO;
10697break;
10698caseISD::UADDSAT:
10699 OverflowOp =ISD::UADDO;
10700break;
10701caseISD::SSUBSAT:
10702 OverflowOp =ISD::SSUBO;
10703break;
10704caseISD::USUBSAT:
10705 OverflowOp =ISD::USUBO;
10706break;
10707default:
10708llvm_unreachable("Expected method to receive signed or unsigned saturation "
10709"addition or subtraction node.");
10710 }
10711
10712// FIXME: Should really try to split the vector in case it's legal on a
10713// subvector.
10714if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10715return DAG.UnrollVectorOp(Node);
10716
10717unsignedBitWidth =LHS.getScalarValueSizeInBits();
10718EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10719SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),LHS,RHS);
10720SDValue SumDiff = Result.getValue(0);
10721SDValue Overflow = Result.getValue(1);
10722SDValue Zero = DAG.getConstant(0, dl, VT);
10723SDValueAllOnes = DAG.getAllOnesConstant(dl, VT);
10724
10725if (Opcode ==ISD::UADDSAT) {
10726if (getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {
10727// (LHS + RHS) | OverflowMask
10728SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10729return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10730 }
10731// Overflow ? 0xffff.... : (LHS + RHS)
10732return DAG.getSelect(dl, VT, Overflow,AllOnes, SumDiff);
10733 }
10734
10735if (Opcode ==ISD::USUBSAT) {
10736if (getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {
10737// (LHS - RHS) & ~OverflowMask
10738SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10739SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10740return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10741 }
10742// Overflow ? 0 : (LHS - RHS)
10743return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10744 }
10745
10746if (Opcode ==ISD::SADDSAT || Opcode ==ISD::SSUBSAT) {
10747APInt MinVal =APInt::getSignedMinValue(BitWidth);
10748APInt MaxVal =APInt::getSignedMaxValue(BitWidth);
10749
10750KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10751KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10752
10753// If either of the operand signs are known, then they are guaranteed to
10754// only saturate in one direction. If non-negative they will saturate
10755// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10756//
10757// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10758// sign of 'y' has to be flipped.
10759
10760bool LHSIsNonNegative = KnownLHS.isNonNegative();
10761bool RHSIsNonNegative = Opcode ==ISD::SADDSAT ? KnownRHS.isNonNegative()
10762 : KnownRHS.isNegative();
10763if (LHSIsNonNegative || RHSIsNonNegative) {
10764SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10765return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10766 }
10767
10768bool LHSIsNegative = KnownLHS.isNegative();
10769bool RHSIsNegative = Opcode ==ISD::SADDSAT ? KnownRHS.isNegative()
10770 : KnownRHS.isNonNegative();
10771if (LHSIsNegative || RHSIsNegative) {
10772SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10773return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10774 }
10775 }
10776
10777// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10778APInt MinVal =APInt::getSignedMinValue(BitWidth);
10779SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10780SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10781 DAG.getConstant(BitWidth - 1, dl, VT));
10782 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10783return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10784}
10785
10786SDValueTargetLowering::expandCMP(SDNode *Node,SelectionDAG &DAG) const{
10787unsigned Opcode = Node->getOpcode();
10788SDValueLHS = Node->getOperand(0);
10789SDValueRHS = Node->getOperand(1);
10790EVT VT =LHS.getValueType();
10791EVT ResVT = Node->getValueType(0);
10792EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10793SDLoc dl(Node);
10794
10795auto LTPredicate = (Opcode ==ISD::UCMP ?ISD::SETULT :ISD::SETLT);
10796auto GTPredicate = (Opcode ==ISD::UCMP ?ISD::SETUGT :ISD::SETGT);
10797SDValue IsLT = DAG.getSetCC(dl, BoolVT,LHS,RHS, LTPredicate);
10798SDValue IsGT = DAG.getSetCC(dl, BoolVT,LHS,RHS, GTPredicate);
10799
10800// We can't perform arithmetic on i1 values. Extending them would
10801// probably result in worse codegen, so let's just use two selects instead.
10802// Some targets are also just better off using selects rather than subtraction
10803// because one of the conditions can be merged with one of the selects.
10804// And finally, if we don't know the contents of high bits of a boolean value
10805// we can't perform any arithmetic either.
10806if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10807getBooleanContents(BoolVT) ==UndefinedBooleanContent) {
10808SDValue SelectZeroOrOne =
10809 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10810 DAG.getConstant(0, dl, ResVT));
10811return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10812 SelectZeroOrOne);
10813 }
10814
10815if (getBooleanContents(BoolVT) ==ZeroOrNegativeOneBooleanContent)
10816std::swap(IsGT, IsLT);
10817return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10818 ResVT);
10819}
10820
10821SDValueTargetLowering::expandShlSat(SDNode *Node,SelectionDAG &DAG) const{
10822unsigned Opcode = Node->getOpcode();
10823bool IsSigned = Opcode ==ISD::SSHLSAT;
10824SDValueLHS = Node->getOperand(0);
10825SDValueRHS = Node->getOperand(1);
10826EVT VT =LHS.getValueType();
10827SDLoc dl(Node);
10828
10829assert((Node->getOpcode() ==ISD::SSHLSAT ||
10830 Node->getOpcode() ==ISD::USHLSAT) &&
10831"Expected a SHLSAT opcode");
10832assert(VT ==RHS.getValueType() &&"Expected operands to be the same type");
10833assert(VT.isInteger() &&"Expected operands to be integers");
10834
10835if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10836return DAG.UnrollVectorOp(Node);
10837
10838// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10839
10840unsigned BW = VT.getScalarSizeInBits();
10841EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10842SDValue Result = DAG.getNode(ISD::SHL, dl, VT,LHS,RHS);
10843SDValue Orig =
10844 DAG.getNode(IsSigned ?ISD::SRA :ISD::SRL, dl, VT, Result,RHS);
10845
10846SDValue SatVal;
10847if (IsSigned) {
10848SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10849SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10850SDValueCond =
10851 DAG.getSetCC(dl, BoolVT,LHS, DAG.getConstant(0, dl, VT),ISD::SETLT);
10852 SatVal = DAG.getSelect(dl, VT,Cond, SatMin, SatMax);
10853 }else {
10854 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10855 }
10856SDValueCond = DAG.getSetCC(dl, BoolVT,LHS, Orig,ISD::SETNE);
10857return DAG.getSelect(dl, VT,Cond, SatVal, Result);
10858}
10859
10860voidTargetLowering::forceExpandMultiply(SelectionDAG &DAG,constSDLoc &dl,
10861boolSigned,SDValue &Lo,SDValue &Hi,
10862SDValue LHS,SDValue RHS,
10863SDValue HiLHS,SDValue HiRHS) const{
10864EVT VT =LHS.getValueType();
10865assert(RHS.getValueType() == VT &&"Mismatching operand types");
10866
10867assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
10868assert((!Signed || !HiLHS) &&
10869"Signed flag should only be set when HiLHS and RiRHS are null");
10870
10871// We'll expand the multiplication by brute force because we have no other
10872// options. This is a trivially-generalized version of the code from
10873// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10874// 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
10875// sign bits while calculating the Hi half.
10876unsigned Bits = VT.getSizeInBits();
10877unsigned HalfBits = Bits / 2;
10878SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10879SDValue LL = DAG.getNode(ISD::AND, dl, VT,LHS, Mask);
10880SDValue RL = DAG.getNode(ISD::AND, dl, VT,RHS, Mask);
10881
10882SDValueT = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
10883SDValue TL = DAG.getNode(ISD::AND, dl, VT,T, Mask);
10884
10885SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10886// This is always an unsigned shift.
10887SDValue TH = DAG.getNode(ISD::SRL, dl, VT,T, Shift);
10888
10889unsigned ShiftOpc =Signed ?ISD::SRA :ISD::SRL;
10890SDValue LH = DAG.getNode(ShiftOpc, dl, VT,LHS, Shift);
10891SDValue RH = DAG.getNode(ShiftOpc, dl, VT,RHS, Shift);
10892
10893SDValue U =
10894 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
10895SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10896SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
10897
10898SDValue V =
10899 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
10900SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
10901
10902Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10903 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10904
10905Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
10906 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10907
10908// If HiLHS and HiRHS are set, multiply them by the opposite low part and add
10909// the products to Hi.
10910if (HiLHS) {
10911Hi = DAG.getNode(ISD::ADD, dl, VT,Hi,
10912 DAG.getNode(ISD::ADD, dl, VT,
10913 DAG.getNode(ISD::MUL, dl, VT, HiRHS,LHS),
10914 DAG.getNode(ISD::MUL, dl, VT,RHS, HiLHS)));
10915 }
10916}
10917
10918voidTargetLowering::forceExpandWideMUL(SelectionDAG &DAG,constSDLoc &dl,
10919boolSigned,constSDValue LHS,
10920constSDValue RHS,SDValue &Lo,
10921SDValue &Hi) const{
10922EVT VT =LHS.getValueType();
10923assert(RHS.getValueType() == VT &&"Mismatching operand types");
10924EVT WideVT =EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10925// We can fall back to a libcall with an illegal type for the MUL if we
10926// have a libcall big enough.
10927RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10928if (WideVT == MVT::i16)
10929 LC = RTLIB::MUL_I16;
10930elseif (WideVT == MVT::i32)
10931 LC = RTLIB::MUL_I32;
10932elseif (WideVT == MVT::i64)
10933 LC = RTLIB::MUL_I64;
10934elseif (WideVT == MVT::i128)
10935 LC = RTLIB::MUL_I128;
10936
10937if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10938 forceExpandMultiply(DAG, dl,Signed,Lo,Hi,LHS,RHS);
10939return;
10940 }
10941
10942SDValue HiLHS, HiRHS;
10943if (Signed) {
10944// The high part is obtained by SRA'ing all but one of the bits of low
10945// part.
10946unsigned LoSize = VT.getFixedSizeInBits();
10947SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
10948 HiLHS = DAG.getNode(ISD::SRA, dl, VT,LHS, Shift);
10949 HiRHS = DAG.getNode(ISD::SRA, dl, VT,RHS, Shift);
10950 }else {
10951 HiLHS = DAG.getConstant(0, dl, VT);
10952 HiRHS = DAG.getConstant(0, dl, VT);
10953 }
10954
10955// Attempt a libcall.
10956SDValue Ret;
10957TargetLowering::MakeLibCallOptions CallOptions;
10958 CallOptions.setIsSigned(Signed);
10959 CallOptions.setIsPostTypeLegalization(true);
10960if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10961// Halves of WideVT are packed into registers in different order
10962// depending on platform endianness. This is usually handled by
10963// the C calling convention, but we can't defer to it in
10964// the legalizer.
10965SDValue Args[] = {LHS, HiLHS,RHS, HiRHS};
10966 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10967 }else {
10968SDValue Args[] = {HiLHS,LHS, HiRHS,RHS};
10969 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10970 }
10971assert(Ret.getOpcode() ==ISD::MERGE_VALUES &&
10972"Ret value is a collection of constituent nodes holding result.");
10973if (DAG.getDataLayout().isLittleEndian()) {
10974// Same as above.
10975Lo = Ret.getOperand(0);
10976Hi = Ret.getOperand(1);
10977 }else {
10978Lo = Ret.getOperand(1);
10979Hi = Ret.getOperand(0);
10980 }
10981}
10982
10983SDValue
10984TargetLowering::expandFixedPointMul(SDNode *Node,SelectionDAG &DAG) const{
10985assert((Node->getOpcode() ==ISD::SMULFIX ||
10986 Node->getOpcode() ==ISD::UMULFIX ||
10987 Node->getOpcode() ==ISD::SMULFIXSAT ||
10988 Node->getOpcode() ==ISD::UMULFIXSAT) &&
10989"Expected a fixed point multiplication opcode");
10990
10991SDLoc dl(Node);
10992SDValueLHS = Node->getOperand(0);
10993SDValueRHS = Node->getOperand(1);
10994EVT VT =LHS.getValueType();
10995unsigned Scale = Node->getConstantOperandVal(2);
10996bool Saturating = (Node->getOpcode() ==ISD::SMULFIXSAT ||
10997 Node->getOpcode() ==ISD::UMULFIXSAT);
10998boolSigned = (Node->getOpcode() ==ISD::SMULFIX ||
10999 Node->getOpcode() ==ISD::SMULFIXSAT);
11000EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11001unsigned VTSize = VT.getScalarSizeInBits();
11002
11003if (!Scale) {
11004// [us]mul.fix(a, b, 0) -> mul(a, b)
11005if (!Saturating) {
11006if (isOperationLegalOrCustom(ISD::MUL, VT))
11007return DAG.getNode(ISD::MUL, dl, VT,LHS,RHS);
11008 }elseif (Signed &&isOperationLegalOrCustom(ISD::SMULO, VT)) {
11009SDValue Result =
11010 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT),LHS,RHS);
11011SDValue Product = Result.getValue(0);
11012SDValue Overflow = Result.getValue(1);
11013SDValue Zero = DAG.getConstant(0, dl, VT);
11014
11015APInt MinVal =APInt::getSignedMinValue(VTSize);
11016APInt MaxVal =APInt::getSignedMaxValue(VTSize);
11017SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11018SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11019// Xor the inputs, if resulting sign bit is 0 the product will be
11020// positive, else negative.
11021SDValueXor = DAG.getNode(ISD::XOR, dl, VT,LHS,RHS);
11022SDValue ProdNeg = DAG.getSetCC(dl, BoolVT,Xor, Zero,ISD::SETLT);
11023 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11024return DAG.getSelect(dl, VT, Overflow, Result, Product);
11025 }elseif (!Signed &&isOperationLegalOrCustom(ISD::UMULO, VT)) {
11026SDValue Result =
11027 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT),LHS,RHS);
11028SDValue Product = Result.getValue(0);
11029SDValue Overflow = Result.getValue(1);
11030
11031APInt MaxVal =APInt::getMaxValue(VTSize);
11032SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11033return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11034 }
11035 }
11036
11037assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11038"Expected scale to be less than the number of bits if signed or at "
11039"most the number of bits if unsigned.");
11040assert(LHS.getValueType() ==RHS.getValueType() &&
11041"Expected both operands to be the same type");
11042
11043// Get the upper and lower bits of the result.
11044SDValueLo,Hi;
11045unsigned LoHiOp =Signed ?ISD::SMUL_LOHI :ISD::UMUL_LOHI;
11046unsigned HiOp =Signed ?ISD::MULHS :ISD::MULHU;
11047EVT WideVT =EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11048if (VT.isVector())
11049 WideVT =
11050EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11051if (isOperationLegalOrCustom(LoHiOp, VT)) {
11052SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT),LHS,RHS);
11053Lo = Result.getValue(0);
11054Hi = Result.getValue(1);
11055 }elseif (isOperationLegalOrCustom(HiOp, VT)) {
11056Lo = DAG.getNode(ISD::MUL, dl, VT,LHS,RHS);
11057Hi = DAG.getNode(HiOp, dl, VT,LHS,RHS);
11058 }elseif (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11059// Try for a multiplication using a wider type.
11060unsigned Ext =Signed ?ISD::SIGN_EXTEND :ISD::ZERO_EXTEND;
11061SDValue LHSExt = DAG.getNode(Ext, dl, WideVT,LHS);
11062SDValue RHSExt = DAG.getNode(Ext, dl, WideVT,RHS);
11063SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11064Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11065SDValue Shifted =
11066 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11067 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11068Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11069 }elseif (VT.isVector()) {
11070returnSDValue();
11071 }else {
11072 forceExpandWideMUL(DAG, dl,Signed,LHS,RHS,Lo,Hi);
11073 }
11074
11075if (Scale == VTSize)
11076// Result is just the top half since we'd be shifting by the width of the
11077// operand. Overflow impossible so this works for both UMULFIX and
11078// UMULFIXSAT.
11079returnHi;
11080
11081// The result will need to be shifted right by the scale since both operands
11082// are scaled. The result is given to us in 2 halves, so we only want part of
11083// both in the result.
11084SDValue Result = DAG.getNode(ISD::FSHR, dl, VT,Hi,Lo,
11085 DAG.getShiftAmountConstant(Scale, VT, dl));
11086if (!Saturating)
11087return Result;
11088
11089if (!Signed) {
11090// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11091// widened multiplication) aren't all zeroes.
11092
11093// Saturate to max if ((Hi >> Scale) != 0),
11094// which is the same as if (Hi > ((1 << Scale) - 1))
11095APInt MaxVal =APInt::getMaxValue(VTSize);
11096SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11097 dl, VT);
11098 Result = DAG.getSelectCC(dl,Hi, LowMask,
11099 DAG.getConstant(MaxVal, dl, VT), Result,
11100ISD::SETUGT);
11101
11102return Result;
11103 }
11104
11105// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11106// widened multiplication) aren't all ones or all zeroes.
11107
11108SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11109SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11110
11111if (Scale == 0) {
11112SDValue Sign = DAG.getNode(ISD::SRA, dl, VT,Lo,
11113 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11114SDValue Overflow = DAG.getSetCC(dl, BoolVT,Hi, Sign,ISD::SETNE);
11115// Saturated to SatMin if wide product is negative, and SatMax if wide
11116// product is positive ...
11117SDValue Zero = DAG.getConstant(0, dl, VT);
11118SDValue ResultIfOverflow = DAG.getSelectCC(dl,Hi, Zero, SatMin, SatMax,
11119ISD::SETLT);
11120// ... but only if we overflowed.
11121return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11122 }
11123
11124// We handled Scale==0 above so all the bits to examine is in Hi.
11125
11126// Saturate to max if ((Hi >> (Scale - 1)) > 0),
11127// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11128SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11129 dl, VT);
11130 Result = DAG.getSelectCC(dl,Hi, LowMask, SatMax, Result,ISD::SETGT);
11131// Saturate to min if (Hi >> (Scale - 1)) < -1),
11132// which is the same as if (HI < (-1 << (Scale - 1))
11133SDValue HighMask =
11134 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11135 dl, VT);
11136 Result = DAG.getSelectCC(dl,Hi, HighMask, SatMin, Result,ISD::SETLT);
11137return Result;
11138}
11139
11140SDValue
11141TargetLowering::expandFixedPointDiv(unsigned Opcode,constSDLoc &dl,
11142SDValue LHS,SDValue RHS,
11143unsigned Scale,SelectionDAG &DAG) const{
11144assert((Opcode ==ISD::SDIVFIX || Opcode ==ISD::SDIVFIXSAT ||
11145 Opcode ==ISD::UDIVFIX || Opcode ==ISD::UDIVFIXSAT) &&
11146"Expected a fixed point division opcode");
11147
11148EVT VT =LHS.getValueType();
11149boolSigned = Opcode ==ISD::SDIVFIX || Opcode ==ISD::SDIVFIXSAT;
11150bool Saturating = Opcode ==ISD::SDIVFIXSAT || Opcode ==ISD::UDIVFIXSAT;
11151EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11152
11153// If there is enough room in the type to upscale the LHS or downscale the
11154// RHS before the division, we can perform it in this type without having to
11155// resize. For signed operations, the LHS headroom is the number of
11156// redundant sign bits, and for unsigned ones it is the number of zeroes.
11157// The headroom for the RHS is the number of trailing zeroes.
11158unsigned LHSLead =Signed ? DAG.ComputeNumSignBits(LHS) - 1
11159 : DAG.computeKnownBits(LHS).countMinLeadingZeros();
11160unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11161
11162// For signed saturating operations, we need to be able to detect true integer
11163// division overflow; that is, when you have MIN / -EPS. However, this
11164// is undefined behavior and if we emit divisions that could take such
11165// values it may cause undesired behavior (arithmetic exceptions on x86, for
11166// example).
11167// Avoid this by requiring an extra bit so that we never get this case.
11168// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11169// signed saturating division, we need to emit a whopping 32-bit division.
11170if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating &&Signed))
11171returnSDValue();
11172
11173unsigned LHSShift = std::min(LHSLead, Scale);
11174unsigned RHSShift = Scale - LHSShift;
11175
11176// At this point, we know that if we shift the LHS up by LHSShift and the
11177// RHS down by RHSShift, we can emit a regular division with a final scaling
11178// factor of Scale.
11179
11180if (LHSShift)
11181LHS = DAG.getNode(ISD::SHL, dl, VT,LHS,
11182 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11183if (RHSShift)
11184RHS = DAG.getNode(Signed ?ISD::SRA :ISD::SRL, dl, VT,RHS,
11185 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11186
11187SDValue Quot;
11188if (Signed) {
11189// For signed operations, if the resulting quotient is negative and the
11190// remainder is nonzero, subtract 1 from the quotient to round towards
11191// negative infinity.
11192SDValue Rem;
11193// FIXME: Ideally we would always produce an SDIVREM here, but if the
11194// type isn't legal, SDIVREM cannot be expanded. There is no reason why
11195// we couldn't just form a libcall, but the type legalizer doesn't do it.
11196if (isTypeLegal(VT) &&
11197isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
11198 Quot = DAG.getNode(ISD::SDIVREM, dl,
11199 DAG.getVTList(VT, VT),
11200LHS,RHS);
11201 Rem = Quot.getValue(1);
11202 Quot = Quot.getValue(0);
11203 }else {
11204 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11205LHS,RHS);
11206 Rem = DAG.getNode(ISD::SREM, dl, VT,
11207LHS,RHS);
11208 }
11209SDValue Zero = DAG.getConstant(0, dl, VT);
11210SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero,ISD::SETNE);
11211SDValue LHSNeg = DAG.getSetCC(dl, BoolVT,LHS, Zero,ISD::SETLT);
11212SDValue RHSNeg = DAG.getSetCC(dl, BoolVT,RHS, Zero,ISD::SETLT);
11213SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11214SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11215 DAG.getConstant(1, dl, VT));
11216 Quot = DAG.getSelect(dl, VT,
11217 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11218 Sub1, Quot);
11219 }else
11220 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11221LHS,RHS);
11222
11223return Quot;
11224}
11225
11226voidTargetLowering::expandUADDSUBO(
11227SDNode *Node,SDValue &Result,SDValue &Overflow,SelectionDAG &DAG) const{
11228SDLoc dl(Node);
11229SDValueLHS = Node->getOperand(0);
11230SDValueRHS = Node->getOperand(1);
11231bool IsAdd = Node->getOpcode() ==ISD::UADDO;
11232
11233// If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11234unsigned OpcCarry = IsAdd ?ISD::UADDO_CARRY :ISD::USUBO_CARRY;
11235if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11236SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11237SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11238 { LHS, RHS, CarryIn });
11239 Result =SDValue(NodeCarry.getNode(), 0);
11240 Overflow =SDValue(NodeCarry.getNode(), 1);
11241return;
11242 }
11243
11244 Result = DAG.getNode(IsAdd ?ISD::ADD :ISD::SUB, dl,
11245LHS.getValueType(),LHS,RHS);
11246
11247EVT ResultType = Node->getValueType(1);
11248EVT SetCCType =getSetCCResultType(
11249 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11250SDValue SetCC;
11251if (IsAdd &&isOneConstant(RHS)) {
11252// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11253// the live range of X. We assume comparing with 0 is cheap.
11254// The general case (X + C) < C is not necessarily beneficial. Although we
11255// reduce the live range of X, we may introduce the materialization of
11256// constant C.
11257 SetCC =
11258 DAG.getSetCC(dl, SetCCType, Result,
11259 DAG.getConstant(0, dl, Node->getValueType(0)),ISD::SETEQ);
11260 }elseif (IsAdd &&isAllOnesConstant(RHS)) {
11261// Special case: uaddo X, -1 overflows if X != 0.
11262 SetCC =
11263 DAG.getSetCC(dl, SetCCType,LHS,
11264 DAG.getConstant(0, dl, Node->getValueType(0)),ISD::SETNE);
11265 }else {
11266ISD::CondCodeCC = IsAdd ?ISD::SETULT :ISD::SETUGT;
11267 SetCC = DAG.getSetCC(dl, SetCCType, Result,LHS,CC);
11268 }
11269 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11270}
11271
11272voidTargetLowering::expandSADDSUBO(
11273SDNode *Node,SDValue &Result,SDValue &Overflow,SelectionDAG &DAG) const{
11274SDLoc dl(Node);
11275SDValueLHS = Node->getOperand(0);
11276SDValueRHS = Node->getOperand(1);
11277bool IsAdd = Node->getOpcode() ==ISD::SADDO;
11278
11279 Result = DAG.getNode(IsAdd ?ISD::ADD :ISD::SUB, dl,
11280LHS.getValueType(),LHS,RHS);
11281
11282EVT ResultType = Node->getValueType(1);
11283EVT OType =getSetCCResultType(
11284 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11285
11286// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11287unsigned OpcSat = IsAdd ?ISD::SADDSAT :ISD::SSUBSAT;
11288if (isOperationLegal(OpcSat,LHS.getValueType())) {
11289SDValue Sat = DAG.getNode(OpcSat, dl,LHS.getValueType(),LHS,RHS);
11290SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat,ISD::SETNE);
11291 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11292return;
11293 }
11294
11295SDValue Zero = DAG.getConstant(0, dl,LHS.getValueType());
11296
11297// For an addition, the result should be less than one of the operands (LHS)
11298// if and only if the other operand (RHS) is negative, otherwise there will
11299// be overflow.
11300// For a subtraction, the result should be less than one of the operands
11301// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11302// otherwise there will be overflow.
11303SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result,LHS,ISD::SETLT);
11304SDValue ConditionRHS =
11305 DAG.getSetCC(dl, OType,RHS, Zero, IsAdd ?ISD::SETLT :ISD::SETGT);
11306
11307 Overflow = DAG.getBoolExtOrTrunc(
11308 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11309 ResultType, ResultType);
11310}
11311
11312boolTargetLowering::expandMULO(SDNode *Node,SDValue &Result,
11313SDValue &Overflow,SelectionDAG &DAG) const{
11314SDLoc dl(Node);
11315EVT VT = Node->getValueType(0);
11316EVT SetCCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11317SDValueLHS = Node->getOperand(0);
11318SDValueRHS = Node->getOperand(1);
11319boolisSigned = Node->getOpcode() ==ISD::SMULO;
11320
11321// For power-of-two multiplications we can use a simpler shift expansion.
11322if (ConstantSDNode *RHSC =isConstOrConstSplat(RHS)) {
11323constAPInt &C = RHSC->getAPIntValue();
11324// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11325if (C.isPowerOf2()) {
11326// smulo(x, signed_min) is same as umulo(x, signed_min).
11327bool UseArithShift =isSigned && !C.isMinSignedValue();
11328SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11329 Result = DAG.getNode(ISD::SHL, dl, VT,LHS, ShiftAmt);
11330 Overflow = DAG.getSetCC(dl, SetCCVT,
11331 DAG.getNode(UseArithShift ?ISD::SRA :ISD::SRL,
11332 dl, VT, Result, ShiftAmt),
11333LHS,ISD::SETNE);
11334returntrue;
11335 }
11336 }
11337
11338EVT WideVT =EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11339if (VT.isVector())
11340 WideVT =
11341EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
11342
11343SDValue BottomHalf;
11344SDValue TopHalf;
11345staticconstunsigned Ops[2][3] =
11346 { {ISD::MULHU,ISD::UMUL_LOHI,ISD::ZERO_EXTEND },
11347 {ISD::MULHS,ISD::SMUL_LOHI,ISD::SIGN_EXTEND }};
11348if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11349 BottomHalf = DAG.getNode(ISD::MUL, dl, VT,LHS,RHS);
11350 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT,LHS,RHS);
11351 }elseif (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11352 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT),LHS,
11353RHS);
11354 TopHalf = BottomHalf.getValue(1);
11355 }elseif (isTypeLegal(WideVT)) {
11356LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT,LHS);
11357RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT,RHS);
11358SDValueMul = DAG.getNode(ISD::MUL, dl, WideVT,LHS,RHS);
11359 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,Mul);
11360SDValue ShiftAmt =
11361 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11362 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11363 DAG.getNode(ISD::SRL, dl, WideVT,Mul, ShiftAmt));
11364 }else {
11365if (VT.isVector())
11366returnfalse;
11367
11368 forceExpandWideMUL(DAG, dl,isSigned,LHS,RHS, BottomHalf, TopHalf);
11369 }
11370
11371 Result = BottomHalf;
11372if (isSigned) {
11373SDValue ShiftAmt = DAG.getShiftAmountConstant(
11374 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11375SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11376 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign,ISD::SETNE);
11377 }else {
11378 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11379 DAG.getConstant(0, dl, VT),ISD::SETNE);
11380 }
11381
11382// Truncate the result if SetCC returns a larger type than needed.
11383EVT RType = Node->getValueType(1);
11384if (RType.bitsLT(Overflow.getValueType()))
11385 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11386
11387assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11388"Unexpected result type for S/UMULO legalization");
11389returntrue;
11390}
11391
11392SDValueTargetLowering::expandVecReduce(SDNode *Node,SelectionDAG &DAG) const{
11393SDLoc dl(Node);
11394unsigned BaseOpcode =ISD::getVecReduceBaseOpcode(Node->getOpcode());
11395SDValueOp = Node->getOperand(0);
11396EVT VT =Op.getValueType();
11397
11398if (VT.isScalableVector())
11399report_fatal_error(
11400"Expanding reductions for scalable vectors is undefined.");
11401
11402// Try to use a shuffle reduction for power of two vectors.
11403if (VT.isPow2VectorType()) {
11404while (VT.getVectorNumElements() > 1) {
11405EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11406if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11407break;
11408
11409SDValueLo,Hi;
11410 std::tie(Lo,Hi) = DAG.SplitVector(Op, dl);
11411Op = DAG.getNode(BaseOpcode, dl, HalfVT,Lo,Hi, Node->getFlags());
11412 VT = HalfVT;
11413 }
11414 }
11415
11416EVT EltVT = VT.getVectorElementType();
11417unsigned NumElts = VT.getVectorNumElements();
11418
11419SmallVector<SDValue, 8> Ops;
11420 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11421
11422SDValue Res = Ops[0];
11423for (unsigned i = 1; i < NumElts; i++)
11424 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11425
11426// Result type may be wider than element type.
11427if (EltVT != Node->getValueType(0))
11428 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11429return Res;
11430}
11431
11432SDValueTargetLowering::expandVecReduceSeq(SDNode *Node,SelectionDAG &DAG) const{
11433SDLoc dl(Node);
11434SDValue AccOp = Node->getOperand(0);
11435SDValue VecOp = Node->getOperand(1);
11436SDNodeFlags Flags = Node->getFlags();
11437
11438EVT VT = VecOp.getValueType();
11439EVT EltVT = VT.getVectorElementType();
11440
11441if (VT.isScalableVector())
11442report_fatal_error(
11443"Expanding reductions for scalable vectors is undefined.");
11444
11445unsigned NumElts = VT.getVectorNumElements();
11446
11447SmallVector<SDValue, 8> Ops;
11448 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11449
11450unsigned BaseOpcode =ISD::getVecReduceBaseOpcode(Node->getOpcode());
11451
11452SDValue Res = AccOp;
11453for (unsigned i = 0; i < NumElts; i++)
11454 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11455
11456return Res;
11457}
11458
11459boolTargetLowering::expandREM(SDNode *Node,SDValue &Result,
11460SelectionDAG &DAG) const{
11461EVT VT = Node->getValueType(0);
11462SDLoc dl(Node);
11463boolisSigned = Node->getOpcode() ==ISD::SREM;
11464unsigned DivOpc =isSigned ?ISD::SDIV :ISD::UDIV;
11465unsigned DivRemOpc =isSigned ?ISD::SDIVREM :ISD::UDIVREM;
11466SDValue Dividend = Node->getOperand(0);
11467SDValue Divisor = Node->getOperand(1);
11468if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11469SDVTList VTs = DAG.getVTList(VT, VT);
11470 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11471returntrue;
11472 }
11473if (isOperationLegalOrCustom(DivOpc, VT)) {
11474// X % Y -> X-X/Y*Y
11475SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11476SDValueMul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11477 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend,Mul);
11478returntrue;
11479 }
11480returnfalse;
11481}
11482
11483SDValueTargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11484SelectionDAG &DAG) const{
11485bool IsSigned = Node->getOpcode() ==ISD::FP_TO_SINT_SAT;
11486SDLoc dl(SDValue(Node, 0));
11487SDValue Src = Node->getOperand(0);
11488
11489// DstVT is the result type, while SatVT is the size to which we saturate
11490EVT SrcVT = Src.getValueType();
11491EVT DstVT = Node->getValueType(0);
11492
11493EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11494unsigned SatWidth = SatVT.getScalarSizeInBits();
11495unsigned DstWidth = DstVT.getScalarSizeInBits();
11496assert(SatWidth <= DstWidth &&
11497"Expected saturation width smaller than result width");
11498
11499// Determine minimum and maximum integer values and their corresponding
11500// floating-point values.
11501APInt MinInt, MaxInt;
11502if (IsSigned) {
11503 MinInt =APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11504 MaxInt =APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11505 }else {
11506 MinInt =APInt::getMinValue(SatWidth).zext(DstWidth);
11507 MaxInt =APInt::getMaxValue(SatWidth).zext(DstWidth);
11508 }
11509
11510// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11511// libcall emission cannot handle this. Large result types will fail.
11512if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11513 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11514 SrcVT = Src.getValueType();
11515 }
11516
11517constfltSemantics &Sem = SrcVT.getFltSemantics();
11518APFloat MinFloat(Sem);
11519APFloat MaxFloat(Sem);
11520
11521APFloat::opStatus MinStatus =
11522 MinFloat.convertFromAPInt(MinInt, IsSigned,APFloat::rmTowardZero);
11523APFloat::opStatus MaxStatus =
11524 MaxFloat.convertFromAPInt(MaxInt, IsSigned,APFloat::rmTowardZero);
11525bool AreExactFloatBounds = !(MinStatus &APFloat::opStatus::opInexact) &&
11526 !(MaxStatus &APFloat::opStatus::opInexact);
11527
11528SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11529SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11530
11531// If the integer bounds are exactly representable as floats and min/max are
11532// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11533// of comparisons and selects.
11534bool MinMaxLegal =isOperationLegal(ISD::FMINNUM, SrcVT) &&
11535isOperationLegal(ISD::FMAXNUM, SrcVT);
11536if (AreExactFloatBounds && MinMaxLegal) {
11537SDValue Clamped = Src;
11538
11539// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11540 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11541// Clamp by MaxFloat from above. NaN cannot occur.
11542 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11543// Convert clamped value to integer.
11544SDValue FpToInt = DAG.getNode(IsSigned ?ISD::FP_TO_SINT :ISD::FP_TO_UINT,
11545 dl, DstVT, Clamped);
11546
11547// In the unsigned case we're done, because we mapped NaN to MinFloat,
11548// which will cast to zero.
11549if (!IsSigned)
11550return FpToInt;
11551
11552// Otherwise, select 0 if Src is NaN.
11553SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11554EVT SetCCVT =
11555getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11556SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src,ISD::CondCode::SETUO);
11557return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11558 }
11559
11560SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11561SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11562
11563// Result of direct conversion. The assumption here is that the operation is
11564// non-trapping and it's fine to apply it to an out-of-range value if we
11565// select it away later.
11566SDValue FpToInt =
11567 DAG.getNode(IsSigned ?ISD::FP_TO_SINT :ISD::FP_TO_UINT, dl, DstVT, Src);
11568
11569SDValueSelect = FpToInt;
11570
11571EVT SetCCVT =
11572getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11573
11574// If Src ULT MinFloat, select MinInt. In particular, this also selects
11575// MinInt if Src is NaN.
11576SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode,ISD::SETULT);
11577Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode,Select);
11578// If Src OGT MaxFloat, select MaxInt.
11579SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode,ISD::SETOGT);
11580Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode,Select);
11581
11582// In the unsigned case we are done, because we mapped NaN to MinInt, which
11583// is already zero.
11584if (!IsSigned)
11585returnSelect;
11586
11587// Otherwise, select 0 if Src is NaN.
11588SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11589SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src,ISD::CondCode::SETUO);
11590return DAG.getSelect(dl, DstVT, IsNan, ZeroInt,Select);
11591}
11592
11593SDValueTargetLowering::expandRoundInexactToOdd(EVT ResultVT,SDValueOp,
11594constSDLoc &dl,
11595SelectionDAG &DAG) const{
11596EVT OperandVT =Op.getValueType();
11597if (OperandVT.getScalarType() == ResultVT.getScalarType())
11598returnOp;
11599EVT ResultIntVT = ResultVT.changeTypeToInteger();
11600// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11601// can induce double-rounding which may alter the results. We can
11602// correct for this using a trick explained in: Boldo, Sylvie, and
11603// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11604// World Congress. 2005.
11605unsigned BitSize = OperandVT.getScalarSizeInBits();
11606EVT WideIntVT = OperandVT.changeTypeToInteger();
11607SDValue OpAsInt = DAG.getBitcast(WideIntVT,Op);
11608SDValue SignBit =
11609 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11610 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11611SDValue AbsWide;
11612if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11613 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT,Op);
11614 }else {
11615SDValue ClearedSign = DAG.getNode(
11616ISD::AND, dl, WideIntVT, OpAsInt,
11617 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11618 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11619 }
11620SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11621SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11622
11623// We can keep the narrow value as-is if narrowing was exact (no
11624// rounding error), the wide value was NaN (the narrow value is also
11625// NaN and should be preserved) or if we rounded to the odd value.
11626SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11627SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11628SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11629SDValueAnd = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11630EVT ResultIntVTCCVT =getSetCCResultType(
11631 DAG.getDataLayout(), *DAG.getContext(),And.getValueType());
11632SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11633// The result is already odd so we don't need to do anything.
11634SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT,And, Zero,ISD::SETNE);
11635
11636EVT WideSetCCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11637 AbsWide.getValueType());
11638// We keep results which are exact, odd or NaN.
11639SDValue KeepNarrow =
11640 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide,ISD::SETUEQ);
11641 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11642// We morally performed a round-down if AbsNarrow is smaller than
11643// AbsWide.
11644SDValue NarrowIsRd =
11645 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide,ISD::SETOGT);
11646// If the narrow value is odd or exact, pick it.
11647// Otherwise, narrow is even and corresponds to either the rounded-up
11648// or rounded-down value. If narrow is the rounded-down value, we want
11649// the rounded-up value as it will be odd.
11650SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11651SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11652Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11653int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11654SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11655 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11656 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11657Op = DAG.getNode(ISD::OR, dl, ResultIntVT,Op, SignBit);
11658return DAG.getNode(ISD::BITCAST, dl, ResultVT,Op);
11659}
11660
11661SDValueTargetLowering::expandFP_ROUND(SDNode *Node,SelectionDAG &DAG) const{
11662assert(Node->getOpcode() ==ISD::FP_ROUND &&"Unexpected opcode!");
11663SDValueOp = Node->getOperand(0);
11664EVT VT = Node->getValueType(0);
11665SDLoc dl(Node);
11666if (VT.getScalarType() == MVT::bf16) {
11667if (Node->getConstantOperandVal(1) == 1) {
11668return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11669 }
11670EVT OperandVT =Op.getValueType();
11671SDValue IsNaN = DAG.getSetCC(
11672 dl,
11673getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11674Op,Op,ISD::SETUO);
11675
11676// We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11677// can induce double-rounding which may alter the results. We can
11678// correct for this using a trick explained in: Boldo, Sylvie, and
11679// Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11680// World Congress. 2005.
11681EVTF32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11682EVT I32 =F32.changeTypeToInteger();
11683Op = expandRoundInexactToOdd(F32,Op, dl, DAG);
11684Op = DAG.getNode(ISD::BITCAST, dl, I32,Op);
11685
11686// Conversions should set NaN's quiet bit. This also prevents NaNs from
11687// turning into infinities.
11688SDValue NaN =
11689 DAG.getNode(ISD::OR, dl, I32,Op, DAG.getConstant(0x400000, dl, I32));
11690
11691// Factor in the contribution of the low 16 bits.
11692SDValue One = DAG.getConstant(1, dl, I32);
11693SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32,Op,
11694 DAG.getShiftAmountConstant(16, I32, dl));
11695 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11696SDValue RoundingBias =
11697 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11698SDValueAdd = DAG.getNode(ISD::ADD, dl, I32,Op, RoundingBias);
11699
11700// Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11701// 0x80000000.
11702Op = DAG.getSelect(dl, I32, IsNaN, NaN,Add);
11703
11704// Now that we have rounded, shift the bits into position.
11705Op = DAG.getNode(ISD::SRL, dl, I32,Op,
11706 DAG.getShiftAmountConstant(16, I32, dl));
11707Op = DAG.getNode(ISD::BITCAST, dl, I32,Op);
11708EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11709Op = DAG.getNode(ISD::TRUNCATE, dl, I16,Op);
11710return DAG.getNode(ISD::BITCAST, dl, VT,Op);
11711 }
11712returnSDValue();
11713}
11714
11715SDValueTargetLowering::expandVectorSplice(SDNode *Node,
11716SelectionDAG &DAG) const{
11717assert(Node->getOpcode() ==ISD::VECTOR_SPLICE &&"Unexpected opcode!");
11718assert(Node->getValueType(0).isScalableVector() &&
11719"Fixed length vector types expected to use SHUFFLE_VECTOR!");
11720
11721EVT VT = Node->getValueType(0);
11722SDValue V1 = Node->getOperand(0);
11723SDValue V2 = Node->getOperand(1);
11724 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11725SDLocDL(Node);
11726
11727// Expand through memory thusly:
11728// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11729// Store V1, Ptr
11730// Store V2, Ptr + sizeof(V1)
11731// If (Imm < 0)
11732// TrailingElts = -Imm
11733// Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11734// else
11735// Ptr = Ptr + (Imm * sizeof(VT.Elt))
11736// Res = Load Ptr
11737
11738Align Alignment = DAG.getReducedAlign(VT,/*UseABI=*/false);
11739
11740EVT MemVT =EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11741 VT.getVectorElementCount() * 2);
11742SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11743EVT PtrVT = StackPtr.getValueType();
11744auto &MF = DAG.getMachineFunction();
11745auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11746auto PtrInfo =MachinePointerInfo::getFixedStack(MF, FrameIndex);
11747
11748// Store the lo part of CONCAT_VECTORS(V1, V2)
11749SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(),DL, V1, StackPtr, PtrInfo);
11750// Store the hi part of CONCAT_VECTORS(V1, V2)
11751SDValue OffsetToV2 = DAG.getVScale(
11752DL, PtrVT,
11753APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11754SDValue StackPtr2 = DAG.getNode(ISD::ADD,DL, PtrVT, StackPtr, OffsetToV2);
11755SDValue StoreV2 = DAG.getStore(StoreV1,DL, V2, StackPtr2, PtrInfo);
11756
11757if (Imm >= 0) {
11758// Load back the required element. getVectorElementPointer takes care of
11759// clamping the index if it's out-of-bounds.
11760 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11761// Load the spliced result
11762return DAG.getLoad(VT,DL, StoreV2, StackPtr,
11763MachinePointerInfo::getUnknownStack(MF));
11764 }
11765
11766uint64_t TrailingElts = -Imm;
11767
11768// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11769TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11770SDValue TrailingBytes =
11771 DAG.getConstant(TrailingElts * EltByteSize,DL, PtrVT);
11772
11773if (TrailingElts > VT.getVectorMinNumElements()) {
11774SDValue VLBytes =
11775 DAG.getVScale(DL, PtrVT,
11776APInt(PtrVT.getFixedSizeInBits(),
11777 VT.getStoreSize().getKnownMinValue()));
11778 TrailingBytes = DAG.getNode(ISD::UMIN,DL, PtrVT, TrailingBytes, VLBytes);
11779 }
11780
11781// Calculate the start address of the spliced result.
11782 StackPtr2 = DAG.getNode(ISD::SUB,DL, PtrVT, StackPtr2, TrailingBytes);
11783
11784// Load the spliced result
11785return DAG.getLoad(VT,DL, StoreV2, StackPtr2,
11786MachinePointerInfo::getUnknownStack(MF));
11787}
11788
11789SDValueTargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11790SelectionDAG &DAG) const{
11791SDLocDL(Node);
11792SDValue Vec = Node->getOperand(0);
11793SDValue Mask = Node->getOperand(1);
11794SDValue Passthru = Node->getOperand(2);
11795
11796EVT VecVT = Vec.getValueType();
11797EVT ScalarVT = VecVT.getScalarType();
11798EVT MaskVT = Mask.getValueType();
11799EVT MaskScalarVT = MaskVT.getScalarType();
11800
11801// Needs to be handled by targets that have scalable vector types.
11802if (VecVT.isScalableVector())
11803report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11804
11805SDValue StackPtr = DAG.CreateStackTemporary(
11806 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT,/*UseABI=*/false));
11807int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11808MachinePointerInfo PtrInfo =
11809MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11810
11811MVT PositionVT =getVectorIdxTy(DAG.getDataLayout());
11812SDValue Chain = DAG.getEntryNode();
11813SDValue OutPos = DAG.getConstant(0,DL, PositionVT);
11814
11815bool HasPassthru = !Passthru.isUndef();
11816
11817// If we have a passthru vector, store it on the stack, overwrite the matching
11818// positions and then re-write the last element that was potentially
11819// overwritten even though mask[i] = false.
11820if (HasPassthru)
11821 Chain = DAG.getStore(Chain,DL, Passthru, StackPtr, PtrInfo);
11822
11823SDValue LastWriteVal;
11824APInt PassthruSplatVal;
11825bool IsSplatPassthru =
11826ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11827
11828if (IsSplatPassthru) {
11829// As we do not know which position we wrote to last, we cannot simply
11830// access that index from the passthru vector. So we first check if passthru
11831// is a splat vector, to use any element ...
11832 LastWriteVal = DAG.getConstant(PassthruSplatVal,DL, ScalarVT);
11833 }elseif (HasPassthru) {
11834// ... if it is not a splat vector, we need to get the passthru value at
11835// position = popcount(mask) and re-load it from the stack before it is
11836// overwritten in the loop below.
11837EVT PopcountVT = ScalarVT.changeTypeToInteger();
11838SDValue Popcount = DAG.getNode(
11839ISD::TRUNCATE,DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11840 Popcount =
11841 DAG.getNode(ISD::ZERO_EXTEND,DL,
11842 MaskVT.changeVectorElementType(PopcountVT), Popcount);
11843 Popcount = DAG.getNode(ISD::VECREDUCE_ADD,DL, PopcountVT, Popcount);
11844SDValue LastElmtPtr =
11845 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11846 LastWriteVal = DAG.getLoad(
11847 ScalarVT,DL, Chain, LastElmtPtr,
11848MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11849 Chain = LastWriteVal.getValue(1);
11850 }
11851
11852unsigned NumElms = VecVT.getVectorNumElements();
11853for (unsignedI = 0;I < NumElms;I++) {
11854SDValueIdx = DAG.getVectorIdxConstant(I,DL);
11855
11856SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, ScalarVT, Vec,Idx);
11857SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11858 Chain = DAG.getStore(
11859 Chain,DL, ValI, OutPtr,
11860MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11861
11862// Get the mask value and add it to the current output position. This
11863// either increments by 1 if MaskI is true or adds 0 otherwise.
11864// Freeze in case we have poison/undef mask entries.
11865SDValue MaskI = DAG.getFreeze(
11866 DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, MaskScalarVT, Mask,Idx));
11867 MaskI = DAG.getFreeze(MaskI);
11868 MaskI = DAG.getNode(ISD::TRUNCATE,DL, MVT::i1, MaskI);
11869 MaskI = DAG.getNode(ISD::ZERO_EXTEND,DL, PositionVT, MaskI);
11870 OutPos = DAG.getNode(ISD::ADD,DL, PositionVT, OutPos, MaskI);
11871
11872if (HasPassthru &&I == NumElms - 1) {
11873SDValue EndOfVector =
11874 DAG.getConstant(VecVT.getVectorNumElements() - 1,DL, PositionVT);
11875SDValue AllLanesSelected =
11876 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector,ISD::CondCode::SETUGT);
11877 OutPos = DAG.getNode(ISD::UMIN,DL, PositionVT, OutPos, EndOfVector);
11878 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11879
11880// Re-write the last ValI if all lanes were selected. Otherwise,
11881// overwrite the last write it with the passthru value.
11882 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11883 LastWriteVal,SDNodeFlags::Unpredictable);
11884 Chain = DAG.getStore(
11885 Chain,DL, LastWriteVal, OutPtr,
11886MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11887 }
11888 }
11889
11890return DAG.getLoad(VecVT,DL, Chain, StackPtr, PtrInfo);
11891}
11892
11893boolTargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG,EVT VT,
11894SDValue &LHS,SDValue &RHS,
11895SDValue &CC,SDValue Mask,
11896SDValue EVL,bool &NeedInvert,
11897constSDLoc &dl,SDValue &Chain,
11898bool IsSignaling) const{
11899MVT OpVT =LHS.getSimpleValueType();
11900ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11901 NeedInvert =false;
11902assert(!EVL == !Mask &&"VP Mask and EVL must either both be set or unset");
11903bool IsNonVP = !EVL;
11904switch (getCondCodeAction(CCCode, OpVT)) {
11905default:
11906llvm_unreachable("Unknown condition code action!");
11907caseTargetLowering::Legal:
11908// Nothing to do.
11909break;
11910caseTargetLowering::Expand: {
11911ISD::CondCode InvCC =ISD::getSetCCSwappedOperands(CCCode);
11912if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11913std::swap(LHS,RHS);
11914CC = DAG.getCondCode(InvCC);
11915returntrue;
11916 }
11917// Swapping operands didn't work. Try inverting the condition.
11918bool NeedSwap =false;
11919 InvCC = getSetCCInverse(CCCode, OpVT);
11920if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11921// If inverting the condition is not enough, try swapping operands
11922// on top of it.
11923 InvCC =ISD::getSetCCSwappedOperands(InvCC);
11924 NeedSwap =true;
11925 }
11926if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11927CC = DAG.getCondCode(InvCC);
11928 NeedInvert =true;
11929if (NeedSwap)
11930std::swap(LHS,RHS);
11931returntrue;
11932 }
11933
11934// Special case: expand i1 comparisons using logical operations.
11935if (OpVT == MVT::i1) {
11936SDValue Ret;
11937switch (CCCode) {
11938default:
11939llvm_unreachable("Unknown integer setcc!");
11940caseISD::SETEQ:// X == Y --> ~(X ^ Y)
11941 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1,LHS,RHS),
11942 MVT::i1);
11943break;
11944caseISD::SETNE:// X != Y --> (X ^ Y)
11945 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1,LHS,RHS);
11946break;
11947caseISD::SETGT:// X >s Y --> X == 0 & Y == 1 --> ~X & Y
11948caseISD::SETULT:// X <u Y --> X == 0 & Y == 1 --> ~X & Y
11949 Ret = DAG.getNode(ISD::AND, dl, MVT::i1,RHS,
11950 DAG.getNOT(dl,LHS, MVT::i1));
11951break;
11952caseISD::SETLT:// X <s Y --> X == 1 & Y == 0 --> ~Y & X
11953caseISD::SETUGT:// X >u Y --> X == 1 & Y == 0 --> ~Y & X
11954 Ret = DAG.getNode(ISD::AND, dl, MVT::i1,LHS,
11955 DAG.getNOT(dl,RHS, MVT::i1));
11956break;
11957caseISD::SETULE:// X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11958caseISD::SETGE:// X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11959 Ret = DAG.getNode(ISD::OR, dl, MVT::i1,RHS,
11960 DAG.getNOT(dl,LHS, MVT::i1));
11961break;
11962caseISD::SETUGE:// X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11963caseISD::SETLE:// X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11964 Ret = DAG.getNode(ISD::OR, dl, MVT::i1,LHS,
11965 DAG.getNOT(dl,RHS, MVT::i1));
11966break;
11967 }
11968
11969LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
11970RHS =SDValue();
11971CC =SDValue();
11972returntrue;
11973 }
11974
11975ISD::CondCode CC1 =ISD::SETCC_INVALID, CC2 =ISD::SETCC_INVALID;
11976unsigned Opc = 0;
11977switch (CCCode) {
11978default:
11979llvm_unreachable("Don't know how to expand this condition!");
11980caseISD::SETUO:
11981if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11982 CC1 =ISD::SETUNE;
11983 CC2 =ISD::SETUNE;
11984 Opc =ISD::OR;
11985break;
11986 }
11987assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11988"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11989 NeedInvert =true;
11990 [[fallthrough]];
11991caseISD::SETO:
11992assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11993"If SETO is expanded, SETOEQ must be legal!");
11994 CC1 =ISD::SETOEQ;
11995 CC2 =ISD::SETOEQ;
11996 Opc =ISD::AND;
11997break;
11998caseISD::SETONE:
11999caseISD::SETUEQ:
12000// If the SETUO or SETO CC isn't legal, we might be able to use
12001// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12002// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12003// the operands.
12004 CC2 = ((unsigned)CCCode & 0x8U) ?ISD::SETUO :ISD::SETO;
12005if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12006isCondCodeLegal(ISD::SETOLT, OpVT))) {
12007 CC1 =ISD::SETOGT;
12008 CC2 =ISD::SETOLT;
12009 Opc =ISD::OR;
12010 NeedInvert = ((unsigned)CCCode & 0x8U);
12011break;
12012 }
12013 [[fallthrough]];
12014caseISD::SETOEQ:
12015caseISD::SETOGT:
12016caseISD::SETOGE:
12017caseISD::SETOLT:
12018caseISD::SETOLE:
12019caseISD::SETUNE:
12020caseISD::SETUGT:
12021caseISD::SETUGE:
12022caseISD::SETULT:
12023caseISD::SETULE:
12024// If we are floating point, assign and break, otherwise fall through.
12025if (!OpVT.isInteger()) {
12026// We can use the 4th bit to tell if we are the unordered
12027// or ordered version of the opcode.
12028 CC2 = ((unsigned)CCCode & 0x8U) ?ISD::SETUO :ISD::SETO;
12029 Opc = ((unsigned)CCCode & 0x8U) ?ISD::OR :ISD::AND;
12030 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12031break;
12032 }
12033// Fallthrough if we are unsigned integer.
12034 [[fallthrough]];
12035caseISD::SETLE:
12036caseISD::SETGT:
12037caseISD::SETGE:
12038caseISD::SETLT:
12039caseISD::SETNE:
12040caseISD::SETEQ:
12041// If all combinations of inverting the condition and swapping operands
12042// didn't work then we have no means to expand the condition.
12043llvm_unreachable("Don't know how to expand this condition!");
12044 }
12045
12046SDValue SetCC1, SetCC2;
12047if (CCCode !=ISD::SETO && CCCode !=ISD::SETUO) {
12048// If we aren't the ordered or unorder operation,
12049// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12050if (IsNonVP) {
12051 SetCC1 = DAG.getSetCC(dl, VT,LHS,RHS, CC1, Chain, IsSignaling);
12052 SetCC2 = DAG.getSetCC(dl, VT,LHS,RHS, CC2, Chain, IsSignaling);
12053 }else {
12054 SetCC1 = DAG.getSetCCVP(dl, VT,LHS,RHS, CC1, Mask, EVL);
12055 SetCC2 = DAG.getSetCCVP(dl, VT,LHS,RHS, CC2, Mask, EVL);
12056 }
12057 }else {
12058// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12059if (IsNonVP) {
12060 SetCC1 = DAG.getSetCC(dl, VT,LHS,LHS, CC1, Chain, IsSignaling);
12061 SetCC2 = DAG.getSetCC(dl, VT,RHS,RHS, CC2, Chain, IsSignaling);
12062 }else {
12063 SetCC1 = DAG.getSetCCVP(dl, VT,LHS,LHS, CC1, Mask, EVL);
12064 SetCC2 = DAG.getSetCCVP(dl, VT,RHS,RHS, CC2, Mask, EVL);
12065 }
12066 }
12067if (Chain)
12068 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12069 SetCC2.getValue(1));
12070if (IsNonVP)
12071LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12072else {
12073// Transform the binary opcode to the VP equivalent.
12074assert((Opc ==ISD::OR || Opc ==ISD::AND) &&"Unexpected opcode");
12075 Opc = Opc ==ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12076LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12077 }
12078RHS =SDValue();
12079CC =SDValue();
12080returntrue;
12081 }
12082 }
12083returnfalse;
12084}
12085
12086SDValueTargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12087SelectionDAG &DAG) const{
12088EVT VT = Node->getValueType(0);
12089// Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12090// split into two equal parts.
12091if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12092returnSDValue();
12093
12094// Restrict expansion to cases where both parts can be concatenated.
12095auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12096if (LoVT != HiVT || !isTypeLegal(LoVT))
12097returnSDValue();
12098
12099SDLocDL(Node);
12100unsigned Opcode = Node->getOpcode();
12101
12102// Don't expand if the result is likely to be unrolled anyway.
12103if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12104returnSDValue();
12105
12106SmallVector<SDValue, 4> LoOps, HiOps;
12107for (constSDValue &V : Node->op_values()) {
12108auto [Lo,Hi] = DAG.SplitVector(V,DL, LoVT, HiVT);
12109 LoOps.push_back(Lo);
12110 HiOps.push_back(Hi);
12111 }
12112
12113SDValue SplitOpLo = DAG.getNode(Opcode,DL, LoVT, LoOps);
12114SDValue SplitOpHi = DAG.getNode(Opcode,DL, HiVT, HiOps);
12115return DAG.getNode(ISD::CONCAT_VECTORS,DL, VT, SplitOpLo, SplitOpHi);
12116}
MRI
unsigned const MachineRegisterInfo * MRI
Definition:AArch64AdvSIMDScalarPass.cpp:105
F32
static const LLT F32
Definition:AMDGPULegalizerInfo.cpp:286
Select
AMDGPU Register Bank Select
Definition:AMDGPURegBankSelect.cpp:71
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition:ARMSLSHardening.cpp:73
true
basic Basic Alias true
Definition:BasicAliasAnalysis.cpp:1981
Analysis
block Block Frequency Analysis
Definition:BlockFrequencyInfo.cpp:300
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
CallingConvLower.h
CodeGenCommonISel.h
DataLayout.h
RetTy
return RetTy
Definition:DeadArgumentElimination.cpp:361
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
DerivedTypes.h
DivisionByConstantInfo.h
Addr
uint64_t Addr
Definition:ELFObjHandler.cpp:79
Size
uint64_t Size
Definition:ELFObjHandler.cpp:81
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
isSigned
static bool isSigned(unsigned int Opcode)
Definition:ExpandLargeDivRem.cpp:52
GlobalVariable.h
ShrinkDemandedConstant
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
Definition:InstCombineSimplifyDemanded.cpp:42
KnownBits.h
LLVMContext.h
RegName
#define RegName(no)
Options
static LVOptions Options
Definition:LVOptions.cpp:25
info
lazy value info
Definition:LazyValueInfo.cpp:61
isNonZeroModBitWidthOrUndef
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
Definition:LegalizerHelper.cpp:7157
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition:Lint.cpp:557
MCAsmInfo.h
MCExpr.h
F
#define F(x, y, z)
Definition:MD5.cpp:55
I
#define I(x, y, z)
Definition:MD5.cpp:58
G
#define G(x, y, z)
Definition:MD5.cpp:56
MachineFrameInfo.h
MachineFunction.h
MachineJumpTableInfo.h
MachineRegisterInfo.h
isUndef
static bool isUndef(const MachineInstr &MI)
Definition:MachineSSAContext.cpp:57
TRI
unsigned const TargetRegisterInfo * TRI
Definition:MachineSink.cpp:2029
MathExtras.h
T1
#define T1
Definition:Mips16ISelLowering.cpp:340
Signed
@ Signed
Definition:NVPTXISelLowering.cpp:4789
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
P
#define P(N)
Passes
Function const char * Passes
Definition:PassBuilderBindings.cpp:51
Merge
R600 Clause Merge
Definition:R600ClauseMergePass.cpp:70
Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition:RISCVRedundantCopyElimination.cpp:75
CC
auto CC
Definition:RISCVRedundantCopyElimination.cpp:79
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.
SelectionDAG.h
Ptr
@ Ptr
Definition:TargetLibraryInfo.cpp:77
foldSetCCWithFunnelShift
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
Definition:TargetLowering.cpp:4437
lowerImmediateIfPossible
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
Definition:TargetLowering.cpp:6070
expandVPFunnelShift
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
Definition:TargetLowering.cpp:7977
getKnownUndefForVectorBinop
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
Definition:TargetLowering.cpp:3031
BuildExactUDIV
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
Definition:TargetLowering.cpp:6207
clampDynamicVectorIndex
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
Definition:TargetLowering.cpp:10449
getConstraintPiority
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
Definition:TargetLowering.cpp:5923
isFCmpEqualZero
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
Definition:TargetLowering.cpp:8703
turnVectorIntoSplatVector
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
Definition:TargetLowering.cpp:6700
canExpandVectorCTPOP
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
Definition:TargetLowering.cpp:9073
foldSetCCWithRotate
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
Definition:TargetLowering.cpp:4394
BuildExactSDIV
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
Definition:TargetLowering.cpp:6147
simplifySetCCWithCTPOP
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
Definition:TargetLowering.cpp:4320
combineShiftToAVG
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
Definition:TargetLowering.cpp:987
TargetLowering.h
This file describes how to lower LLVM code to machine code.
TargetRegisterInfo.h
UndefPoisonKind::PoisonOnly
@ PoisonOnly
ValueTracking.h
VectorUtils.h
Lookup
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
Definition:X86FloatingPoint.cpp:613
scalarizeVectorStore
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Definition:X86ISelLowering.cpp:25131
RHS
Value * RHS
Definition:X86PartialReduction.cpp:74
LHS
Value * LHS
Definition:X86PartialReduction.cpp:73
Node
Definition:ItaniumDemangle.h:163
Predicate
Definition:AMDGPURegBankLegalizeRules.cpp:332
T
llvm::APFloat
Definition:APFloat.h:904
llvm::APFloat::convertFromAPInt
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition:APFloat.h:1334
llvm::APFloat::getSmallestNormalized
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition:APFloat.h:1160
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition:APFloat.h:1351
llvm::APFloat::getLargest
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition:APFloat.h:1140
llvm::APFloat::getInf
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition:APFloat.h:1100
llvm::APFloat::getNaN
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition:APFloat.h:1111
llvm::APInt
Class for arbitrary precision integers.
Definition:APInt.h:78
llvm::APInt::udiv
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition:APInt.cpp:1547
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition:APInt.h:234
llvm::APInt::udivrem
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition:APInt.cpp:1732
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition:APInt.h:1407
llvm::APInt::isNegatedPowerOf2
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition:APInt.h:449
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition:APInt.cpp:986
llvm::APInt::getSignMask
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition:APInt.h:229
llvm::APInt::isMinSignedValue
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition:APInt.h:423
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition:APInt.h:1520
llvm::APInt::setHighBits
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition:APInt.h:1392
llvm::APInt::setBitsFrom
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition:APInt.h:1386
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition:APInt.cpp:1007
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition:APInt.h:1492
llvm::APInt::trunc
APInt trunc(unsigned width) const
Truncate to new width.
Definition:APInt.cpp:910
llvm::APInt::getMaxValue
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition:APInt.h:206
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition:APInt.h:1330
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition:APInt.h:371
llvm::APInt::ugt
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition:APInt.h:1182
llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition:APInt.h:258
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition:APInt.h:380
llvm::APInt::urem
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition:APInt.cpp:1640
llvm::APInt::setSignBit
void setSignBit()
Set the sign bit to 1.
Definition:APInt.h:1340
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition:APInt.h:1468
llvm::APInt::getSignedMaxValue
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition:APInt.h:209
llvm::APInt::getMinValue
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition:APInt.h:216
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition:APInt.h:329
llvm::APInt::intersects
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition:APInt.h:1249
llvm::APInt::clearAllBits
void clearAllBits()
Set every bit to 0.
Definition:APInt.h:1397
llvm::APInt::reverseBits
APInt reverseBits() const
Definition:APInt.cpp:741
llvm::APInt::ashrInPlace
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition:APInt.h:834
llvm::APInt::negate
void negate()
Negate this APInt in place.
Definition:APInt.h:1450
llvm::APInt::countr_zero
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition:APInt.h:1618
llvm::APInt::countl_zero
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition:APInt.h:1577
llvm::APInt::getSplat
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition:APInt.cpp:624
llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition:APInt.h:219
llvm::APInt::getSignificantBits
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition:APInt.h:1511
llvm::APInt::countLeadingZeros
unsigned countLeadingZeros() const
Definition:APInt.h:1585
llvm::APInt::isStrictlyPositive
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition:APInt.h:356
llvm::APInt::insertBits
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition:APInt.cpp:370
llvm::APInt::logBase2
unsigned logBase2() const
Definition:APInt.h:1739
llvm::APInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition:APInt.h:475
llvm::APInt::setAllBits
void setAllBits()
Set every bit to 1.
Definition:APInt.h:1319
llvm::APInt::multiplicativeInverse
APInt multiplicativeInverse() const
Definition:APInt.cpp:1248
llvm::APInt::isMaxSignedValue
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition:APInt.h:405
llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition:APInt.h:334
llvm::APInt::ule
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition:APInt.h:1150
llvm::APInt::sext
APInt sext(unsigned width) const
Sign extend to a new width.
Definition:APInt.cpp:959
llvm::APInt::setBits
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition:APInt.h:1367
llvm::APInt::shl
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition:APInt.h:873
llvm::APInt::byteSwap
APInt byteSwap() const
Definition:APInt.cpp:719
llvm::APInt::isSubsetOf
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition:APInt.h:1257
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition:APInt.h:440
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition:APInt.h:306
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition:APInt.h:296
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition:APInt.h:200
llvm::APInt::setLowBits
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition:APInt.h:1389
llvm::APInt::extractBits
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition:APInt.cpp:455
llvm::APInt::isOne
bool isOne() const
Determine if this is a value of 1.
Definition:APInt.h:389
llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition:APInt.h:286
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition:APInt.h:239
llvm::APInt::clearHighBits
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition:APInt.h:1424
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition:APInt.h:1542
llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition:APInt.h:858
llvm::APInt::lshr
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition:APInt.h:851
llvm::APInt::countr_one
unsigned countr_one() const
Count the number of trailing one bits.
Definition:APInt.h:1635
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition:APInt.h:1221
llvm::APInt::setBitVal
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition:APInt.h:1343
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition:ArrayRef.h:41
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition:ArrayRef.h:168
llvm::AttrBuilder
Definition:Attributes.h:1064
llvm::AttrBuilder::hasAttributes
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition:Attributes.h:1123
llvm::AttrBuilder::contains
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
Definition:Attributes.cpp:2326
llvm::AttrBuilder::removeAttribute
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
Definition:Attributes.cpp:2118
llvm::AttributeList
Definition:Attributes.h:490
llvm::AttributeList::hasFnAttr
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
Definition:Attributes.cpp:1877
llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition:SelectionDAGNodes.h:2107
llvm::BuildVectorSDNode::getConstantSplatNode
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
Definition:SelectionDAG.cpp:13249
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition:CallingConvLower.h:33
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition:CallingConvLower.h:122
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition:CallingConvLower.h:128
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition:InstrTypes.h:1112
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition:Instructions.h:1479
llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition:Constants.h:709
llvm::ConstantFPSDNode
Definition:SelectionDAGNodes.h:1739
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition:Constants.h:271
llvm::ConstantRange
This class represents a range of values.
Definition:ConstantRange.h:47
llvm::ConstantSDNode
Definition:SelectionDAGNodes.h:1684
llvm::ConstantSDNode::isOne
bool isOne() const
Definition:SelectionDAGNodes.h:1709
llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition:SelectionDAGNodes.h:1700
llvm::ConstantSDNode::isOpaque
bool isOpaque() const
Definition:SelectionDAGNodes.h:1715
llvm::ConstantSDNode::isZero
bool isZero() const
Definition:SelectionDAGNodes.h:1710
llvm::ConstantSDNode::isAllOnes
bool isAllOnes() const
Definition:SelectionDAGNodes.h:1711
llvm::Constant
This is an important base class in LLVM.
Definition:Constant.h:42
llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition:DWARFExpression.h:32
llvm::DWARFExpression::Operation::getNumOperands
uint64_t getNumOperands() const
Definition:DWARFExpression.h:90
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition:DataLayout.h:63
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition:DataLayout.h:197
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition:DataLayout.h:198
llvm::DataLayout::getPrefTypeAlign
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition:DataLayout.cpp:847
llvm::DemandedBits
Definition:DemandedBits.h:40
llvm::ElementCount
Definition:TypeSize.h:300
llvm::Function
Definition:Function.h:63
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition:Function.h:353
llvm::GISelKnownBits
Definition:GISelKnownBits.h:29
llvm::GlobalAddressSDNode
Definition:SelectionDAGNodes.h:1876
llvm::GlobalAddressSDNode::getOffset
int64_t getOffset() const
Definition:SelectionDAGNodes.h:1891
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition:SelectionDAGNodes.h:1890
llvm::GlobalValue
Definition:GlobalValue.h:48
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition:GlobalValue.h:657
llvm::GlobalVariable
Definition:GlobalVariable.h:39
llvm::InlineAsm
Definition:InlineAsm.h:34
llvm::InlineAsm::isLabel
@ isLabel
Definition:InlineAsm.h:99
llvm::InlineAsm::isInput
@ isInput
Definition:InlineAsm.h:96
llvm::InlineAsm::isOutput
@ isOutput
Definition:InlineAsm.h:97
llvm::InlineAsm::isClobber
@ isClobber
Definition:InlineAsm.h:98
llvm::InlineAsm::ConstraintCodeVector
std::vector< std::string > ConstraintCodeVector
Definition:InlineAsm.h:102
llvm::InstructionCost
Definition:InstructionCost.h:29
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition:Type.cpp:311
llvm::LLVMContext::emitError
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Definition:LLVMContext.cpp:210
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition:SelectionDAGNodes.h:2464
llvm::MCContext
Context object for machine code objects.
Definition:MCContext.h:83
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition:MCExpr.h:34
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition:MCRegister.h:33
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition:MCExpr.h:398
llvm::MVT
Machine Value Type.
Definition:MachineValueType.h:35
llvm::MVT::SimpleValueType
SimpleValueType
Definition:MachineValueType.h:37
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition:MachineValueType.h:55
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition:MachineValueType.h:90
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition:MachineValueType.h:308
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition:MachineValueType.h:80
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition:MachineValueType.h:441
llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition:MachineValueType.h:259
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition:MachineFrameInfo.h:106
llvm::MachineFrameInfo::setAdjustsStack
void setAdjustsStack(bool V)
Definition:MachineFrameInfo.h:618
llvm::MachineFrameInfo::setHasCalls
void setHasCalls(bool V)
Definition:MachineFrameInfo.h:622
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition:MachineFrameInfo.h:486
llvm::MachineFunction
Definition:MachineFunction.h:267
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition:MachineFunction.h:749
llvm::MachineFunction::getDenormalMode
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
Definition:MachineFunction.cpp:324
llvm::MachineFunction::getJTISymbol
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Definition:MachineFunction.cpp:787
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition:MachineFunction.h:704
llvm::MachineJumpTableInfo::EK_GPRel32BlockAddress
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
Definition:MachineJumpTableInfo.h:63
llvm::MachineJumpTableInfo::EK_LabelDifference32
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
Definition:MachineJumpTableInfo.h:72
llvm::MachineJumpTableInfo::EK_BlockAddress
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Definition:MachineJumpTableInfo.h:53
llvm::MachineJumpTableInfo::EK_GPRel64BlockAddress
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
Definition:MachineJumpTableInfo.h:58
llvm::MachineMemOperand::MONone
@ MONone
Definition:MachineMemOperand.h:134
llvm::MachineOperand::clobbersPhysReg
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
Definition:MachineOperand.h:646
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition:MachineRegisterInfo.h:51
llvm::Module::getNamedGlobal
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition:Module.h:462
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition:ArrayRef.h:310
llvm::MutableArrayRef::end
iterator end() const
Definition:ArrayRef.h:360
llvm::MutableArrayRef::begin
iterator begin() const
Definition:ArrayRef.h:359
llvm::PointerType
Class to represent pointers.
Definition:DerivedTypes.h:670
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
llvm::Register
Wrapper class representing virtual and physical registers.
Definition:Register.h:19
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition:SelectionDAGNodes.h:1182
llvm::SDNode
Represents one node in the SelectionDAG.
Definition:SelectionDAGNodes.h:496
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition:SelectionDAGNodes.h:687
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition:SelectionDAGNodes.h:739
llvm::SDNode::getFlags
SDNodeFlags getFlags() const
Definition:SelectionDAGNodes.h:1043
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition:SelectionDAGNodes.h:992
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition:SelectionDAGNodes.h:1062
llvm::SDNode::setFlags
void setFlags(SDNodeFlags NewFlags)
Definition:SelectionDAGNodes.h:1044
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition:SelectionDAGNodes.h:145
llvm::SDValue::isUndef
bool isUndef() const
Definition:SelectionDAGNodes.h:1249
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition:SelectionDAGNodes.h:159
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition:SelectionDAGNodes.h:1257
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition:SelectionDAGNodes.h:179
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition:SelectionDAGNodes.h:1217
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition:SelectionDAGNodes.h:199
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition:SelectionDAGNodes.h:1225
llvm::SDValue::use_empty
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
Definition:SelectionDAGNodes.h:1253
llvm::SDValue::getConstantOperandAPInt
const APInt & getConstantOperandAPInt(unsigned i) const
Definition:SelectionDAGNodes.h:1233
llvm::SDValue::getScalarValueSizeInBits
uint64_t getScalarValueSizeInBits() const
Definition:SelectionDAGNodes.h:203
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition:SelectionDAGNodes.h:1229
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition:SelectionDAGNodes.h:190
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition:SelectionDAGNodes.h:1213
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition:SelectionDAG.h:228
llvm::SelectionDAG::willNotOverflowAdd
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Definition:SelectionDAG.h:2000
llvm::SelectionDAG::getReducedAlign
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
Definition:SelectionDAG.cpp:2743
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition:SelectionDAG.cpp:9287
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition:SelectionDAG.h:751
llvm::SelectionDAG::getExtOrTrunc
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition:SelectionDAG.h:983
llvm::SelectionDAG::ComputeMaxSignificantBits
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
Definition:SelectionDAG.cpp:5417
llvm::SelectionDAG::isKnownNeverSNaN
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
Definition:SelectionDAG.h:2153
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition:SelectionDAG.cpp:10708
llvm::SelectionDAG::getShiftAmountConstant
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
Definition:SelectionDAG.cpp:1811
llvm::SelectionDAG::FoldSetCC
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
Definition:SelectionDAG.cpp:2813
llvm::SelectionDAG::getAllOnesConstant
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition:SelectionDAG.cpp:1800
llvm::SelectionDAG::ExtractVectorElements
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
Definition:SelectionDAG.cpp:13053
llvm::SelectionDAG::getVScale
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
Definition:SelectionDAG.cpp:2092
llvm::SelectionDAG::getFreeze
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
Definition:SelectionDAG.cpp:2462
llvm::SelectionDAG::getConstantPool
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
Definition:SelectionDAG.cpp:1968
llvm::SelectionDAG::isConstantIntBuildVectorOrConstantInt
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
Definition:SelectionDAG.cpp:13439
llvm::SelectionDAG::getJumpTableDebugInfo
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
Definition:SelectionDAG.cpp:1961
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition:SelectionDAG.h:1251
llvm::SelectionDAG::UnrollVectorOp
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
Definition:SelectionDAG.cpp:12720
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition:SelectionDAG.cpp:1873
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition:SelectionDAG.cpp:9270
llvm::SelectionDAG::getGLOBAL_OFFSET_TABLE
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
Definition:SelectionDAG.h:1141
llvm::SelectionDAG::getStepVector
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
Definition:SelectionDAG.cpp:2125
llvm::SelectionDAG::willNotOverflowSub
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
Definition:SelectionDAG.h:2018
llvm::SelectionDAG::shouldOptForSize
bool shouldOptForSize() const
Definition:SelectionDAG.cpp:1401
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition:SelectionDAG.cpp:1622
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition:SelectionDAG.h:503
llvm::SelectionDAG::MaxRecursionDepth
static constexpr unsigned MaxRecursionDepth
Definition:SelectionDAG.h:458
llvm::SelectionDAG::GetSplitDestVTs
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
Definition:SelectionDAG.cpp:12961
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition:SelectionDAG.h:1129
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition:SelectionDAG.h:857
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition:SelectionDAG.cpp:2433
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition:SelectionDAG.h:1280
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition:SelectionDAG.cpp:1568
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition:SelectionDAG.h:497
llvm::SelectionDAG::doesNodeExist
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
Definition:SelectionDAG.cpp:11311
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition:SelectionDAG.cpp:1666
llvm::SelectionDAG::getMemBasePlusOffset
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
Definition:SelectionDAG.cpp:8052
llvm::SelectionDAG::getGlobalAddress
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
Definition:SelectionDAG.cpp:1891
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition:SelectionDAG.cpp:9371
llvm::SelectionDAG::SplitVector
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
Definition:SelectionDAG.cpp:13006
llvm::SelectionDAG::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
Definition:SelectionDAG.cpp:5430
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition:SelectionDAG.cpp:9320
llvm::SelectionDAG::getSignedConstant
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Definition:SelectionDAG.cpp:1794
llvm::SelectionDAG::getSplatVector
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition:SelectionDAG.h:891
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition:SelectionDAG.cpp:2969
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition:SelectionDAG.cpp:1084
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition:SelectionDAG.cpp:1502
llvm::SelectionDAG::isKnownToBeAPowerOfTwo
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
Definition:SelectionDAG.cpp:4645
llvm::SelectionDAG::isKnownNeverZero
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
Definition:SelectionDAG.cpp:5814
llvm::SelectionDAG::FoldConstantArithmetic
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
Definition:SelectionDAG.cpp:6672
llvm::SelectionDAG::getBoolExtOrTrunc
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
Definition:SelectionDAG.cpp:1559
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition:SelectionDAG.cpp:2052
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition:SelectionDAG.h:498
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition:SelectionDAG.h:1290
llvm::SelectionDAG::isKnownNeverZeroFloat
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
Definition:SelectionDAG.cpp:5805
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition:SelectionDAG.cpp:2038
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition:SelectionDAG.cpp:10327
llvm::SelectionDAG::getFPExtendOrRound
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
Definition:SelectionDAG.cpp:1475
llvm::SelectionDAG::isKnownNeverNaN
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
Definition:SelectionDAG.cpp:5672
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition:SelectionDAG.h:701
llvm::SelectionDAG::ComputeNumSignBits
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition:SelectionDAG.cpp:4739
llvm::SelectionDAG::getBoolConstant
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
Definition:SelectionDAG.cpp:1651
llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition:SelectionDAG.h:797
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition:SelectionDAG.cpp:1824
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition:SelectionDAG.h:492
llvm::SelectionDAG::getValidMaximumShiftAmount
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
Definition:SelectionDAG.cpp:3392
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition:SelectionDAG.cpp:3415
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition:SelectionDAG.cpp:1508
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition:SelectionDAG.cpp:2079
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition:SelectionDAG.cpp:2977
llvm::SelectionDAG::getValidShiftAmount
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
Definition:SelectionDAG.cpp:3349
llvm::SelectionDAG::getObjectPtrOffset
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition:SelectionDAG.h:1081
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition:SelectionDAG.h:510
llvm::SelectionDAG::getSetCCVP
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
Definition:SelectionDAG.h:1268
llvm::SelectionDAG::CreateStackTemporary
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition:SelectionDAG.cpp:2776
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition:SelectionDAG.h:580
llvm::SelectionDAG::getSplat
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition:SelectionDAG.h:907
llvm::SelectionDAG::SplitScalar
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
Definition:SelectionDAG.cpp:12946
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition:SelectionDAG.cpp:2147
llvm::ShuffleVectorSDNode::commuteMask
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
Definition:SelectionDAGNodes.h:1666
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition:SmallString.h:26
llvm::SmallVectorBase::size
size_t size() const
Definition:SmallVector.h:78
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition:SmallVector.h:573
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition:SmallVector.h:683
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::SrcOp
Definition:MachineIRBuilder.h:142
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition:SelectionDAGNodes.h:2492
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StringRef::substr
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition:StringRef.h:571
llvm::StringRef::starts_with
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition:StringRef.h:265
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition:StringRef.h:150
llvm::StringRef::data
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition:StringRef.h:144
llvm::StringRef::end
iterator end() const
Definition:StringRef.h:118
llvm::StructType
Class to represent struct types.
Definition:DerivedTypes.h:218
llvm::TargetLoweringBase::ArgListEntry
Definition:TargetLowering.h:297
llvm::TargetLoweringBase::ArgListEntry::IsNoExt
bool IsNoExt
Definition:TargetLowering.h:304
llvm::TargetLoweringBase::ArgListEntry::setAttributes
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
Definition:TargetLowering.cpp:112
llvm::TargetLoweringBase::ArgListEntry::IsSwiftAsync
bool IsSwiftAsync
Definition:TargetLowering.h:314
llvm::TargetLoweringBase::ArgListEntry::IsSRet
bool IsSRet
Definition:TargetLowering.h:306
llvm::TargetLoweringBase::ArgListEntry::IsInAlloca
bool IsInAlloca
Definition:TargetLowering.h:310
llvm::TargetLoweringBase::ArgListEntry::IsNest
bool IsNest
Definition:TargetLowering.h:307
llvm::TargetLoweringBase::ArgListEntry::IndirectType
Type * IndirectType
Definition:TargetLowering.h:318
llvm::TargetLoweringBase::ArgListEntry::IsSExt
bool IsSExt
Definition:TargetLowering.h:302
llvm::TargetLoweringBase::ArgListEntry::Alignment
MaybeAlign Alignment
Definition:TargetLowering.h:317
llvm::TargetLoweringBase::ArgListEntry::IsReturned
bool IsReturned
Definition:TargetLowering.h:312
llvm::TargetLoweringBase::ArgListEntry::IsZExt
bool IsZExt
Definition:TargetLowering.h:303
llvm::TargetLoweringBase::ArgListEntry::IsPreallocated
bool IsPreallocated
Definition:TargetLowering.h:311
llvm::TargetLoweringBase::ArgListEntry::IsSwiftSelf
bool IsSwiftSelf
Definition:TargetLowering.h:313
llvm::TargetLoweringBase::ArgListEntry::IsInReg
bool IsInReg
Definition:TargetLowering.h:305
llvm::TargetLoweringBase::ArgListEntry::IsByVal
bool IsByVal
Definition:TargetLowering.h:308
llvm::TargetLoweringBase::ArgListEntry::IsSwiftError
bool IsSwiftError
Definition:TargetLowering.h:315
llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition:TargetLowering.h:195
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition:TargetLowering.h:1442
llvm::TargetLoweringBase::isShuffleMaskLegal
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
Definition:TargetLowering.h:1244
llvm::TargetLoweringBase::shouldRemoveRedundantExtend
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
Definition:TargetLowering.h:1827
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition:TargetLowering.h:1677
llvm::TargetLoweringBase::Expand
@ Expand
Definition:TargetLowering.h:202
llvm::TargetLoweringBase::Legal
@ Legal
Definition:TargetLowering.h:200
llvm::TargetLoweringBase::getLibcallCallingConv
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
Definition:TargetLowering.h:3466
llvm::TargetLoweringBase::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition:TargetLowering.h:2854
llvm::TargetLoweringBase::isSExtCheaperThanZExt
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
Definition:TargetLowering.h:3085
llvm::TargetLoweringBase::getVectorIdxTy
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
Definition:TargetLowering.h:421
llvm::TargetLoweringBase::isSafeMemOpType
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
Definition:TargetLowering.h:1999
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition:TargetLowering.h:364
llvm::TargetLoweringBase::isCtpopFast
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
Definition:TargetLowering.h:722
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition:TargetLowering.h:3066
llvm::TargetLoweringBase::isPaddedAtMostSignificantBitsWhenStored
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
Definition:TargetLowering.h:1832
llvm::TargetLoweringBase::TypePromoteInteger
@ TypePromoteInteger
Definition:TargetLowering.h:211
llvm::TargetLoweringBase::getOptimalMemOpType
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
Definition:TargetLowering.h:1980
llvm::TargetLoweringBase::getCondCodeAction
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
Definition:TargetLowering.h:1616
llvm::TargetLoweringBase::isCommutativeBinOp
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
Definition:TargetLowering.h:2897
llvm::TargetLoweringBase::isFPImmLegal
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
Definition:TargetLowering.h:1235
llvm::TargetLoweringBase::getCmpLibcallReturnType
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
Definition:TargetLoweringBase.cpp:1529
llvm::TargetLoweringBase::getBitWidthForCttzElements
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
Definition:TargetLoweringBase.cpp:923
llvm::TargetLoweringBase::shouldTransformSignedTruncationCheck
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
Definition:TargetLowering.h:842
llvm::TargetLoweringBase::isLegalRC
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
Definition:TargetLoweringBase.cpp:1144
llvm::TargetLoweringBase::shouldExpandCmpUsingSelects
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
Definition:TargetLowering.h:3408
llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition:TargetLowering.h:1918
llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition:TargetLowering.h:1380
llvm::TargetLoweringBase::getShiftAmountTy
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
Definition:TargetLoweringBase.cpp:890
llvm::TargetLoweringBase::shouldExtendTypeInLibCall
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
Definition:TargetLowering.h:2303
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition:TargetLowering.h:2972
llvm::TargetLoweringBase::shouldAvoidTransformToShift
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
Definition:TargetLowering.h:3384
llvm::TargetLoweringBase::isFPExtFree
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
Definition:TargetLowering.h:3186
llvm::TargetLoweringBase::getSetCCResultType
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
Definition:TargetLoweringBase.cpp:1523
llvm::TargetLoweringBase::getTypeToTransformTo
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition:TargetLowering.h:1156
llvm::TargetLoweringBase::getBooleanContents
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
Definition:TargetLowering.h:1004
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
Definition:TargetLowering.h:1630
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition:TargetLowering.h:1093
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition:TargetLowering.h:371
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition:TargetLowering.h:1447
llvm::TargetLoweringBase::shouldReduceLoadWidth
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
Definition:TargetLowering.h:1815
llvm::TargetLoweringBase::getCustomCtpopCost
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
Definition:TargetLowering.h:728
llvm::TargetLoweringBase::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Definition:TargetLowering.h:855
llvm::TargetLoweringBase::BooleanContent
BooleanContent
Enum that describes how the target represents true/false values.
Definition:TargetLowering.h:234
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition:TargetLowering.h:236
llvm::TargetLoweringBase::UndefinedBooleanContent
@ UndefinedBooleanContent
Definition:TargetLowering.h:235
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition:TargetLowering.h:237
llvm::TargetLoweringBase::isIntDivCheap
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
Definition:TargetLowering.h:559
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition:TargetLowering.h:1339
llvm::TargetLoweringBase::allowsMemoryAccess
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
Definition:TargetLoweringBase.cpp:1735
llvm::TargetLoweringBase::hasAndNotCompare
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
Definition:TargetLowering.h:797
llvm::TargetLoweringBase::isNarrowingProfitable
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
Definition:TargetLowering.h:3305
llvm::TargetLoweringBase::isBinOp
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
Definition:TargetLowering.h:2941
llvm::TargetLoweringBase::isCtlzFast
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
Definition:TargetLowering.h:717
llvm::TargetLoweringBase::shouldUseStrictFP_TO_INT
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
Definition:TargetLowering.h:2455
llvm::TargetLoweringBase::NegatibleCost
NegatibleCost
Enum that specifies when a float negation is beneficial.
Definition:TargetLowering.h:282
llvm::TargetLoweringBase::NegatibleCost::Cheaper
@ Cheaper
llvm::TargetLoweringBase::NegatibleCost::Expensive
@ Expensive
llvm::TargetLoweringBase::NegatibleCost::Neutral
@ Neutral
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition:TargetLowering.h:1143
llvm::TargetLoweringBase::getCmpLibcallCC
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
Definition:TargetLowering.h:3455
llvm::TargetLoweringBase::shouldSignExtendTypeInLibCall
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
Definition:TargetLowering.h:2298
llvm::TargetLoweringBase::getLibcallName
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
Definition:TargetLowering.h:3440
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition:TargetLowering.h:329
llvm::TargetLoweringBase::getAsmOperandValueType
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Definition:TargetLowering.h:1668
llvm::TargetLoweringBase::isCondCodeLegalOrCustom
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
Definition:TargetLowering.h:1636
llvm::TargetLoweringBase::getRegisterType
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
Definition:TargetLowering.h:1728
llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition:TargetLowering.h:3223
llvm::TargetLoweringBase::getOperationAction
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
Definition:TargetLowering.h:1270
llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition:TargetLowering.h:1367
llvm::TargetLoweringBase::MulExpansionKind
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
Definition:TargetLowering.h:275
llvm::TargetLoweringBase::MulExpansionKind::Always
@ Always
llvm::TargetLoweringBase::getExtendForContent
static ISD::NodeType getExtendForContent(BooleanContent Content)
Definition:TargetLowering.h:334
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition:TargetLowering.h:3780
llvm::TargetLowering::expandAddSubSat
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
Definition:TargetLowering.cpp:10670
llvm::TargetLowering::buildSDIVPow2WithCMov
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
Definition:TargetLowering.cpp:6292
llvm::TargetLowering::getMultipleConstraintMatchWeight
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
Definition:TargetLowering.cpp:5945
llvm::TargetLowering::expandVPCTLZ
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
Definition:TargetLowering.cpp:9279
llvm::TargetLowering::expandMULO
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
Definition:TargetLowering.cpp:11312
llvm::TargetLowering::expandMUL
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
Definition:TargetLowering.cpp:7778
llvm::TargetLowering::ConstraintType
ConstraintType
Definition:TargetLowering.h:4950
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition:TargetLowering.h:4952
llvm::TargetLowering::C_Memory
@ C_Memory
Definition:TargetLowering.h:4953
llvm::TargetLowering::C_Immediate
@ C_Immediate
Definition:TargetLowering.h:4955
llvm::TargetLowering::C_Register
@ C_Register
Definition:TargetLowering.h:4951
llvm::TargetLowering::C_Other
@ C_Other
Definition:TargetLowering.h:4956
llvm::TargetLowering::C_Address
@ C_Address
Definition:TargetLowering.h:4954
llvm::TargetLowering::C_Unknown
@ C_Unknown
Definition:TargetLowering.h:4957
llvm::TargetLowering::getPICJumpTableRelocBaseExpr
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
Definition:TargetLowering.cpp:472
llvm::TargetLowering::SimplifyDemandedVectorElts
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
Definition:TargetLowering.cpp:3077
llvm::TargetLowering::isUsedByReturnOnly
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
Definition:TargetLowering.h:4807
llvm::TargetLowering::computeKnownBitsForFrameIndex
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
Definition:TargetLowering.cpp:3795
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition:TargetLowering.cpp:10050
llvm::TargetLowering::ComputeNumSignBitsForTargetNode
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
Definition:TargetLowering.cpp:3809
llvm::TargetLowering::lowerCmpEqZeroToCtlzSrl
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
Definition:TargetLowering.cpp:10566
llvm::TargetLowering::computeNumSignBitsForTargetInstr
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
Definition:TargetLowering.cpp:3822
llvm::TargetLowering::expandVPBSWAP
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
Definition:TargetLowering.cpp:9761
llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition:TargetLowering.cpp:292
llvm::TargetLowering::forceExpandWideMUL
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
Definition:TargetLowering.cpp:10918
llvm::TargetLowering::makeLibCall
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Definition:TargetLowering.cpp:147
llvm::TargetLowering::expandVecReduceSeq
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
Definition:TargetLowering.cpp:11432
llvm::TargetLowering::expandCTLZ
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
Definition:TargetLowering.cpp:9229
llvm::TargetLowering::expandBITREVERSE
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
Definition:TargetLowering.cpp:9837
llvm::TargetLowering::expandCTTZ
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
Definition:TargetLowering.cpp:9350
llvm::TargetLowering::expandIndirectJTBranch
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
Definition:TargetLowering.cpp:478
llvm::TargetLowering::expandABD
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
Definition:TargetLowering.cpp:9547
llvm::TargetLowering::computeKnownAlignForTargetInstr
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
Definition:TargetLowering.cpp:3801
llvm::TargetLowering::AsmOperandInfoVector
std::vector< AsmOperandInfo > AsmOperandInfoVector
Definition:TargetLowering.h:5008
llvm::TargetLowering::expandShlSat
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
Definition:TargetLowering.cpp:10821
llvm::TargetLowering::expandIS_FPCLASS
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
Definition:TargetLowering.cpp:8724
llvm::TargetLowering::expandFP_TO_INT_SAT
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
Definition:TargetLowering.cpp:11483
llvm::TargetLowering::SimplifyMultipleUseDemandedBits
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
Definition:TargetLowering.cpp:675
llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition:TargetLowering.cpp:10277
llvm::TargetLowering::SimplifyMultipleUseDemandedVectorElts
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
Definition:TargetLowering.cpp:977
llvm::TargetLowering::findOptimalMemOpLowering
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
Definition:TargetLowering.cpp:200
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition:TargetLowering.cpp:5525
llvm::TargetLowering::expandSADDSUBO
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
Definition:TargetLowering.cpp:11272
llvm::TargetLowering::expandVPBITREVERSE
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
Definition:TargetLowering.cpp:9898
llvm::TargetLowering::expandABS
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
Definition:TargetLowering.cpp:9492
llvm::TargetLowering::expandVecReduce
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
Definition:TargetLowering.cpp:11392
llvm::TargetLowering::ShrinkDemandedConstant
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
Definition:TargetLowering.cpp:514
llvm::TargetLowering::expandVPCTTZElements
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
Definition:TargetLowering.cpp:9422
llvm::TargetLowering::BuildSDIV
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
Definition:TargetLowering.cpp:6331
llvm::TargetLowering::getTargetNodeName
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
Definition:TargetLowering.cpp:43
llvm::TargetLowering::expandFP_TO_UINT
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
Definition:TargetLowering.cpp:8301
llvm::TargetLowering::parametersInCSRMatch
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
Definition:TargetLowering.cpp:83
llvm::TargetLowering::SimplifyDemandedVectorEltsForTargetNode
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
Definition:TargetLowering.cpp:3828
llvm::TargetLowering::expandREM
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
Definition:TargetLowering.cpp:11459
llvm::TargetLowering::expandUnalignedLoad
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
Definition:TargetLowering.cpp:10128
llvm::TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
Definition:TargetLowering.cpp:8612
llvm::TargetLowering::forceExpandMultiply
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
Definition:TargetLowering.cpp:10860
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition:TargetLowering.cpp:10526
llvm::TargetLowering::expandVectorSplice
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
Definition:TargetLowering.cpp:11715
llvm::TargetLowering::LowerXConstraint
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
Definition:TargetLowering.cpp:5571
llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition:TargetLowering.h:4960
llvm::TargetLowering::expandCTPOP
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
Definition:TargetLowering.cpp:9083
llvm::TargetLowering::BuildUDIV
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
Definition:TargetLowering.cpp:6499
llvm::TargetLowering::expandVectorNaryOpBySplitting
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
Definition:TargetLowering.cpp:12086
llvm::TargetLowering::expandBSWAP
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
Definition:TargetLowering.cpp:9703
llvm::TargetLowering::expandFMINIMUM_FMAXIMUM
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
Definition:TargetLowering.cpp:8545
llvm::TargetLowering::CTTZTableLookup
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
Definition:TargetLowering.cpp:9306
llvm::TargetLowering::isKnownNeverNaNForTargetNode
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
Definition:TargetLowering.cpp:3921
llvm::TargetLowering::expandDIVREMByConstant
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
Definition:TargetLowering.cpp:7815
llvm::TargetLowering::getVectorSubVecPointer
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
Definition:TargetLowering.cpp:10492
llvm::TargetLowering::computeKnownBitsForTargetNode
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition:TargetLowering.cpp:3774
llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition:TargetLowering.cpp:47
llvm::TargetLowering::ConstraintPair
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
Definition:TargetLowering.h:5039
llvm::TargetLowering::getNegatedExpression
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
Definition:TargetLowering.cpp:7295
llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition:TargetLowering.cpp:5969
llvm::TargetLowering::getSqrtInputTest
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
Definition:TargetLowering.cpp:7270
llvm::TargetLowering::getConstraintPreferences
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
Definition:TargetLowering.cpp:6038
llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition:TargetLowering.cpp:8230
llvm::TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
Definition:TargetLowering.cpp:3853
llvm::TargetLowering::LowerAsmOutputForConstraint
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
Definition:TargetLowering.cpp:5579
llvm::TargetLowering::buildLegalVectorShuffle
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
Definition:TargetLowering.cpp:3867
llvm::TargetLowering::getPICJumpTableRelocBase
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
Definition:TargetLowering.cpp:457
llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition:TargetLowering.cpp:9961
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition:TargetLowering.cpp:5669
llvm::TargetLowering::SimplifyDemandedBits
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Definition:TargetLowering.cpp:1134
llvm::TargetLowering::SimplifyDemandedBitsForTargetNode
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition:TargetLowering.cpp:3840
llvm::TargetLowering::TargetLowering
TargetLowering(const TargetLowering &)=delete
llvm::TargetLowering::isConstFalseVal
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
Definition:TargetLowering.cpp:3981
llvm::TargetLowering::IncrementMemoryAddress
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
Definition:TargetLowering.cpp:10411
llvm::TargetLowering::verifyReturnAddressArgumentIsConstant
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
Definition:TargetLowering.cpp:7260
llvm::TargetLowering::isInTailCallPosition
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
Definition:TargetLowering.cpp:53
llvm::TargetLowering::ParseConstraints
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
Definition:TargetLowering.cpp:5731
llvm::TargetLowering::isSplatValueForTargetNode
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
Definition:TargetLowering.cpp:3934
llvm::TargetLowering::expandRoundInexactToOdd
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
Definition:TargetLowering.cpp:11593
llvm::TargetLowering::SimplifySetCC
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
Definition:TargetLowering.cpp:4505
llvm::TargetLowering::expandFunnelShift
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
Definition:TargetLowering.cpp:8034
llvm::TargetLowering::isOffsetFoldingLegal
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition:TargetLowering.cpp:490
llvm::TargetLowering::LegalizeSetCCCondCode
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
Definition:TargetLowering.cpp:11893
llvm::TargetLowering::isExtendedTrueVal
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
Definition:TargetLowering.cpp:4005
llvm::TargetLowering::ShrinkDemandedOp
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
Definition:TargetLowering.cpp:575
llvm::TargetLowering::isConstTrueVal
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
Definition:TargetLowering.cpp:3951
llvm::TargetLowering::expandVPCTPOP
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
Definition:TargetLowering.cpp:9160
llvm::TargetLowering::expandFixedPointDiv
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
Definition:TargetLowering.cpp:11141
llvm::TargetLowering::getVectorElementPointer
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
Definition:TargetLowering.cpp:10483
llvm::TargetLowering::ComputeConstraintToUse
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
Definition:TargetLowering.cpp:6088
llvm::TargetLowering::CollectTargetIntrinsicOperands
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
Definition:TargetLowering.cpp:5664
llvm::TargetLowering::expandVPCTTZ
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
Definition:TargetLowering.cpp:9406
llvm::TargetLowering::expandVECTOR_COMPRESS
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
Definition:TargetLowering.cpp:11789
llvm::TargetLowering::getTargetConstantFromLoad
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
Definition:TargetLowering.cpp:3883
llvm::TargetLowering::expandFP_ROUND
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
Definition:TargetLowering.cpp:11661
llvm::TargetLowering::createSelectForFMINNUM_FMAXNUM
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
Definition:TargetLowering.cpp:8463
llvm::TargetLowering::expandROT
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
Definition:TargetLowering.cpp:8123
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition:TargetLowering.cpp:5587
llvm::TargetLowering::expandFMINNUM_FMAXNUM
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
Definition:TargetLowering.cpp:8489
llvm::TargetLowering::isGAPlusOffset
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
Definition:TargetLowering.cpp:5484
llvm::TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
Definition:TargetLowering.cpp:3887
llvm::TargetLowering::getJumpTableEncoding
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Definition:TargetLowering.cpp:444
llvm::TargetLowering::expandCMP
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
Definition:TargetLowering.cpp:10786
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition:TargetLowering.cpp:8178
llvm::TargetLowering::PerformDAGCombine
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition:TargetLowering.cpp:5514
llvm::TargetLowering::canCreateUndefOrPoisonForTargetNode
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Definition:TargetLowering.cpp:3908
llvm::TargetLowering::expandFixedPointMul
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
Definition:TargetLowering.cpp:10984
llvm::TargetLowering::expandIntMINMAX
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
Definition:TargetLowering.cpp:10589
llvm::TargetLowering::expandVectorFindLastActive
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
Definition:TargetLowering.cpp:9455
llvm::TargetLowering::computeKnownBitsForTargetInstr
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition:TargetLowering.cpp:3788
llvm::TargetLowering::expandUADDSUBO
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
Definition:TargetLowering.cpp:11226
llvm::TargetLowering::BuildSDIVPow2
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Definition:TargetLowering.cpp:6265
llvm::TargetLowering::BuildSREMPow2
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
Definition:TargetLowering.cpp:6275
llvm::TargetLowering::expandUINT_TO_FP
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
Definition:TargetLowering.cpp:8403
llvm::TargetLowering::expandMUL_LOHI
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
Definition:TargetLowering.cpp:7603
llvm::TargetLowering::expandAVG
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Definition:TargetLowering.cpp:9621
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition:TargetMachine.h:77
llvm::TargetMachine::isPositionIndependent
bool isPositionIndependent() const
Definition:TargetMachine.cpp:117
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition:TargetMachine.h:126
llvm::TargetMachine::Options
TargetOptions Options
Definition:TargetMachine.h:118
llvm::TargetOptions
Definition:TargetOptions.h:132
llvm::TargetRegisterClass
Definition:TargetRegisterInfo.h:44
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition:TargetRegisterInfo.h:235
llvm::TargetRegisterInfo::regclasses
iterator_range< regclass_iterator > regclasses() const
Definition:TargetRegisterInfo.h:835
llvm::TargetRegisterInfo::getRegAsmName
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
Definition:TargetRegisterInfo.h:1126
llvm::TargetRegisterInfo::isTypeLegalForClass
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
Definition:TargetRegisterInfo.h:311
llvm::Triple::isOSBinFormatCOFF
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition:Triple.h:755
llvm::TypeSize
Definition:TypeSize.h:334
llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition:TypeSize.h:345
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Type::getFltSemantics
const fltSemantics & getFltSemantics() const
llvm::Type::isSingleValueType
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition:Type.h:295
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition:Type.h:310
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition:Type.h:128
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition:Type.h:237
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition:Value.h:255
llvm::Value::stripPointerCastsAndAliases
const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition:Value.cpp:698
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition:Value.cpp:309
llvm::details::FixedOrScalableQuantity::isKnownMultipleOf
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition:TypeSize.h:183
llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition:TypeSize.h:202
llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition:TypeSize.h:171
llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition:TypeSize.h:168
uint16_t
uint32_t
uint64_t
unsigned
ErrorHandling.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition:ErrorHandling.h:143
TargetMachine.h
llvm::APIntOps::ScaleBitMask
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition:APInt.cpp:2982
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition:BitmaskEnum.h:125
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition:CallingConv.h:41
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition:ISDOpcodes.h:40
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition:ISDOpcodes.h:780
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition:ISDOpcodes.h:243
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition:ISDOpcodes.h:753
llvm::ISD::STORE
@ STORE
Definition:ISDOpcodes.h:1103
llvm::ISD::FP_TO_BF16
@ FP_TO_BF16
Definition:ISDOpcodes.h:974
llvm::ISD::SREM
@ SREM
Definition:ISDOpcodes.h:251
llvm::ISD::FGETSIGN
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition:ISDOpcodes.h:512
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition:ISDOpcodes.h:257
llvm::ISD::UDIV
@ UDIV
Definition:ISDOpcodes.h:250
llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition:ISDOpcodes.h:574
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition:ISDOpcodes.h:842
llvm::ISD::UMIN
@ UMIN
Definition:ISDOpcodes.h:699
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition:ISDOpcodes.h:744
llvm::ISD::ROTR
@ ROTR
Definition:ISDOpcodes.h:739
llvm::ISD::SMULFIX
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition:ISDOpcodes.h:374
llvm::ISD::ConstantFP
@ ConstantFP
Definition:ISDOpcodes.h:77
llvm::ISD::UADDO
@ UADDO
Definition:ISDOpcodes.h:331
llvm::ISD::SDIV
@ SDIV
Definition:ISDOpcodes.h:249
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition:ISDOpcodes.h:276
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition:ISDOpcodes.h:502
llvm::ISD::FMAXNUM_IEEE
@ FMAXNUM_IEEE
Definition:ISDOpcodes.h:1045
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition:ISDOpcodes.h:246
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition:ISDOpcodes.h:1102
llvm::ISD::SMULFIXSAT
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition:ISDOpcodes.h:380
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition:ISDOpcodes.h:814
llvm::ISD::FSUB
@ FSUB
Definition:ISDOpcodes.h:398
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition:ISDOpcodes.h:498
llvm::ISD::UMULFIX
@ UMULFIX
Definition:ISDOpcodes.h:375
llvm::ISD::FABS
@ FABS
Definition:ISDOpcodes.h:982
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition:ISDOpcodes.h:205
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition:ISDOpcodes.h:841
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition:ISDOpcodes.h:558
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition:ISDOpcodes.h:397
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition:ISDOpcodes.h:717
llvm::ISD::SIGN_EXTEND_VECTOR_INREG
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition:ISDOpcodes.h:871
llvm::ISD::UDIVREM
@ UDIVREM
Definition:ISDOpcodes.h:263
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition:ISDOpcodes.h:262
llvm::ISD::SRL
@ SRL
Definition:ISDOpcodes.h:737
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition:ISDOpcodes.h:1051
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition:ISDOpcodes.h:954
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition:ISDOpcodes.h:236
llvm::ISD::SDIVFIX
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition:ISDOpcodes.h:387
llvm::ISD::BUILTIN_OP_END
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition:ISDOpcodes.h:1494
llvm::ISD::UCMP
@ UCMP
Definition:ISDOpcodes.h:706
llvm::ISD::SRA
@ SRA
Definition:ISDOpcodes.h:736
llvm::ISD::USUBO
@ USUBO
Definition:ISDOpcodes.h:335
llvm::ISD::AVGFLOORU
@ AVGFLOORU
Definition:ISDOpcodes.h:681
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition:ISDOpcodes.h:805
llvm::ISD::AVGCEILS
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition:ISDOpcodes.h:685
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition:ISDOpcodes.h:635
llvm::ISD::USHLSAT
@ USHLSAT
Definition:ISDOpcodes.h:367
llvm::ISD::UDIVFIXSAT
@ UDIVFIXSAT
Definition:ISDOpcodes.h:394
llvm::ISD::UADDSAT
@ UADDSAT
Definition:ISDOpcodes.h:348
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition:ISDOpcodes.h:752
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition:ISDOpcodes.h:1032
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition:ISDOpcodes.h:981
llvm::ISD::CTTZ
@ CTTZ
Definition:ISDOpcodes.h:745
llvm::ISD::SSUBO
@ SSUBO
Same for subtraction.
Definition:ISDOpcodes.h:334
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition:ISDOpcodes.h:888
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition:ISDOpcodes.h:1123
llvm::ISD::OR
@ OR
Definition:ISDOpcodes.h:710
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition:ISDOpcodes.h:515
llvm::ISD::IS_FPCLASS
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition:ISDOpcodes.h:522
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition:ISDOpcodes.h:356
llvm::ISD::UMULO
@ UMULO
Definition:ISDOpcodes.h:339
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition:ISDOpcodes.h:795
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition:ISDOpcodes.h:757
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition:ISDOpcodes.h:258
llvm::ISD::VECREDUCE_UMAX
@ VECREDUCE_UMAX
Definition:ISDOpcodes.h:1451
llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition:ISDOpcodes.h:229
llvm::ISD::SPLAT_VECTOR
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition:ISDOpcodes.h:642
llvm::ISD::FSHL
@ FSHL
Definition:ISDOpcodes.h:740
llvm::ISD::AVGCEILU
@ AVGCEILU
Definition:ISDOpcodes.h:686
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition:ISDOpcodes.h:215
llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition:ISDOpcodes.h:330
llvm::ISD::FSHR
@ FSHR
Definition:ISDOpcodes.h:741
llvm::ISD::USUBSAT
@ USUBSAT
Definition:ISDOpcodes.h:357
llvm::ISD::VECREDUCE_ADD
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition:ISDOpcodes.h:1444
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition:ISDOpcodes.h:674
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition:ISDOpcodes.h:735
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition:ISDOpcodes.h:615
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition:ISDOpcodes.h:588
llvm::ISD::FMINNUM_IEEE
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition:ISDOpcodes.h:1044
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition:ISDOpcodes.h:439
llvm::ISD::XOR
@ XOR
Definition:ISDOpcodes.h:711
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition:ISDOpcodes.h:550
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition:ISDOpcodes.h:811
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition:ISDOpcodes.h:440
llvm::ISD::CTPOP
@ CTPOP
Definition:ISDOpcodes.h:747
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition:ISDOpcodes.h:772
llvm::ISD::FMUL
@ FMUL
Definition:ISDOpcodes.h:399
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition:ISDOpcodes.h:1031
llvm::ISD::SUB
@ SUB
Definition:ISDOpcodes.h:247
llvm::ISD::MULHS
@ MULHS
Definition:ISDOpcodes.h:675
llvm::ISD::SSHLSAT
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition:ISDOpcodes.h:366
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition:ISDOpcodes.h:338
llvm::ISD::PARITY
@ PARITY
Definition:ISDOpcodes.h:749
llvm::ISD::ANY_EXTEND_VECTOR_INREG
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition:ISDOpcodes.h:860
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition:ISDOpcodes.h:849
llvm::ISD::UDIVFIX
@ UDIVFIX
Definition:ISDOpcodes.h:388
llvm::ISD::UMULFIXSAT
@ UMULFIXSAT
Definition:ISDOpcodes.h:381
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition:ISDOpcodes.h:697
llvm::ISD::Constant
@ Constant
Definition:ISDOpcodes.h:76
llvm::ISD::SDIVFIXSAT
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition:ISDOpcodes.h:393
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition:ISDOpcodes.h:939
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition:ISDOpcodes.h:766
llvm::ISD::UADDO_CARRY
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition:ISDOpcodes.h:310
llvm::ISD::FDIV
@ FDIV
Definition:ISDOpcodes.h:400
llvm::ISD::FREM
@ FREM
Definition:ISDOpcodes.h:401
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition:ISDOpcodes.h:457
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition:ISDOpcodes.h:1050
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition:ISDOpcodes.h:887
llvm::ISD::TargetConstant
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition:ISDOpcodes.h:164
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition:ISDOpcodes.h:709
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition:ISDOpcodes.h:190
llvm::ISD::USUBO_CARRY
@ USUBO_CARRY
Definition:ISDOpcodes.h:311
llvm::ISD::AVGFLOORS
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition:ISDOpcodes.h:680
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition:ISDOpcodes.h:286
llvm::ISD::UREM
@ UREM
Definition:ISDOpcodes.h:252
llvm::ISD::FREEZE
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition:ISDOpcodes.h:223
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition:ISDOpcodes.h:539
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition:ISDOpcodes.h:52
llvm::ISD::FSIN
@ FSIN
Definition:ISDOpcodes.h:985
llvm::ISD::VECTOR_SPLICE
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition:ISDOpcodes.h:627
llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition:ISDOpcodes.h:408
llvm::ISD::MUL
@ MUL
Definition:ISDOpcodes.h:248
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition:ISDOpcodes.h:920
llvm::ISD::CTLZ
@ CTLZ
Definition:ISDOpcodes.h:746
llvm::ISD::FMAXIMUMNUM
@ FMAXIMUMNUM
Definition:ISDOpcodes.h:1056
llvm::ISD::ZERO_EXTEND_VECTOR_INREG
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition:ISDOpcodes.h:882
llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition:ISDOpcodes.h:906
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition:ISDOpcodes.h:817
llvm::ISD::ROTL
@ ROTL
Definition:ISDOpcodes.h:738
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition:ISDOpcodes.h:794
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition:ISDOpcodes.h:748
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition:ISDOpcodes.h:347
llvm::ISD::AssertZext
@ AssertZext
Definition:ISDOpcodes.h:62
llvm::ISD::SMAX
@ SMAX
Definition:ISDOpcodes.h:698
llvm::ISD::UMAX
@ UMAX
Definition:ISDOpcodes.h:700
llvm::ISD::FMINIMUMNUM
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition:ISDOpcodes.h:1055
llvm::ISD::ABDS
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition:ISDOpcodes.h:692
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition:ISDOpcodes.h:198
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition:ISDOpcodes.h:530
llvm::ISD::isBuildVectorOfConstantSDNodes
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
Definition:SelectionDAG.cpp:287
llvm::ISD::getExtForLoadExtType
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
Definition:SelectionDAG.cpp:601
llvm::ISD::matchUnaryPredicate
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
Definition:SelectionDAGNodes.h:3267
llvm::ISD::isZEXTLoad
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
Definition:SelectionDAGNodes.h:3233
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition:SelectionDAG.cpp:639
llvm::ISD::isTrueWhenEqual
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition:ISDOpcodes.h:1668
llvm::ISD::getUnorderedFlavor
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition:ISDOpcodes.h:1673
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition:SelectionDAG.cpp:616
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition:SelectionDAG.cpp:283
llvm::ISD::isSignedIntSetCC
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition:ISDOpcodes.h:1643
llvm::ISD::isConstantSplatVector
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
Definition:SelectionDAG.cpp:153
llvm::ISD::matchBinaryPredicate
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
Definition:SelectionDAG.cpp:396
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition:ISDOpcodes.h:1559
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition:ISDOpcodes.h:1610
llvm::ISD::SETOEQ
@ SETOEQ
Definition:ISDOpcodes.h:1613
llvm::ISD::SETUNE
@ SETUNE
Definition:ISDOpcodes.h:1626
llvm::ISD::SETUEQ
@ SETUEQ
Definition:ISDOpcodes.h:1621
llvm::ISD::SETOLE
@ SETOLE
Definition:ISDOpcodes.h:1617
llvm::ISD::SETOLT
@ SETOLT
Definition:ISDOpcodes.h:1616
llvm::ISD::SETNE
@ SETNE
Definition:ISDOpcodes.h:1635
llvm::ISD::SETUGT
@ SETUGT
Definition:ISDOpcodes.h:1622
llvm::ISD::SETOGT
@ SETOGT
Definition:ISDOpcodes.h:1614
llvm::ISD::SETULT
@ SETULT
Definition:ISDOpcodes.h:1624
llvm::ISD::SETUO
@ SETUO
Definition:ISDOpcodes.h:1620
llvm::ISD::SETONE
@ SETONE
Definition:ISDOpcodes.h:1618
llvm::ISD::SETGT
@ SETGT
Definition:ISDOpcodes.h:1631
llvm::ISD::SETLT
@ SETLT
Definition:ISDOpcodes.h:1633
llvm::ISD::SETO
@ SETO
Definition:ISDOpcodes.h:1619
llvm::ISD::SETGE
@ SETGE
Definition:ISDOpcodes.h:1632
llvm::ISD::SETUGE
@ SETUGE
Definition:ISDOpcodes.h:1623
llvm::ISD::SETLE
@ SETLE
Definition:ISDOpcodes.h:1634
llvm::ISD::SETULE
@ SETULE
Definition:ISDOpcodes.h:1625
llvm::ISD::SETOGE
@ SETOGE
Definition:ISDOpcodes.h:1615
llvm::ISD::SETEQ
@ SETEQ
Definition:ISDOpcodes.h:1630
llvm::ISD::SETCC_INVALID
@ SETCC_INVALID
Definition:ISDOpcodes.h:1638
llvm::ISD::getVecReduceBaseOpcode
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
Definition:SelectionDAG.cpp:448
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition:ISDOpcodes.h:1590
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition:ISDOpcodes.h:1590
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition:ISDOpcodes.h:1590
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition:ISDOpcodes.h:1590
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition:ISDOpcodes.h:1590
llvm::M68k::MemAddrModeKind::V
@ V
llvm::M68k::MemAddrModeKind::K
@ K
llvm::RISCVFenceField::W
@ W
Definition:RISCVBaseInfo.h:374
llvm::RISCVFenceField::R
@ R
Definition:RISCVBaseInfo.h:373
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition:RuntimeLibcalls.h:33
llvm::SystemZISD::XC
@ XC
Definition:SystemZISelLowering.h:124
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::dxil::ElementType::I1
@ I1
llvm::logicalview::LVAttributeKind::Zero
@ Zero
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::Offset
@ Offset
Definition:DWP.cpp:480
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1739
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition:SelectionDAG.cpp:12205
llvm::Depth
@ Depth
Definition:SIMachineScheduler.h:36
llvm::peekThroughBitcasts
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
Definition:SelectionDAG.cpp:12297
llvm::LoopIdiomVectorizeStyle::Masked
@ Masked
llvm::invertFPClassTestIfSimpler
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
Definition:CodeGenCommonISel.cpp:176
llvm::DiagnosticPredicateTy::Match
@ Match
llvm::FloatStyle::Exponent
@ Exponent
llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition:MathExtras.h:556
llvm::bit_ceil
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition:bit.h:342
llvm::isConstOrConstSplatFP
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
Definition:SelectionDAG.cpp:12377
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition:STLExtras.h:1746
llvm::getShuffleDemandedElts
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
Definition:VectorUtils.cpp:373
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition:MathExtras.h:341
llvm::isBitwiseNot
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
Definition:SelectionDAG.cpp:12321
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition:MathExtras.h:292
llvm::ComplexDeinterleavingOperation::Splat
@ Splat
llvm::FPClassTest
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
Definition:FloatingPointMode.h:239
llvm::fcInf
@ fcInf
Definition:FloatingPointMode.h:254
llvm::fcNegSubnormal
@ fcNegSubnormal
Definition:FloatingPointMode.h:246
llvm::fcPosNormal
@ fcPosNormal
Definition:FloatingPointMode.h:250
llvm::fcQNan
@ fcQNan
Definition:FloatingPointMode.h:243
llvm::fcNegZero
@ fcNegZero
Definition:FloatingPointMode.h:247
llvm::fcNegInf
@ fcNegInf
Definition:FloatingPointMode.h:244
llvm::fcFinite
@ fcFinite
Definition:FloatingPointMode.h:260
llvm::fcSubnormal
@ fcSubnormal
Definition:FloatingPointMode.h:256
llvm::fcNone
@ fcNone
Definition:FloatingPointMode.h:240
llvm::fcNegFinite
@ fcNegFinite
Definition:FloatingPointMode.h:259
llvm::fcPosZero
@ fcPosZero
Definition:FloatingPointMode.h:248
llvm::fcPosFinite
@ fcPosFinite
Definition:FloatingPointMode.h:258
llvm::fcNegNormal
@ fcNegNormal
Definition:FloatingPointMode.h:245
llvm::fcZero
@ fcZero
Definition:FloatingPointMode.h:257
llvm::fcAllFlags
@ fcAllFlags
Definition:FloatingPointMode.h:264
llvm::fcPosSubnormal
@ fcPosSubnormal
Definition:FloatingPointMode.h:249
llvm::fcPosInf
@ fcPosInf
Definition:FloatingPointMode.h:251
llvm::fcNormal
@ fcNormal
Definition:FloatingPointMode.h:255
llvm::fcNan
@ fcNan
Definition:FloatingPointMode.h:253
llvm::find_if_not
auto find_if_not(R &&Range, UnaryPredicate P)
Definition:STLExtras.h:1771
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition:Error.cpp:167
llvm::getVScaleRange
ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
Definition:ValueTracking.cpp:1058
llvm::isOneOrOneSplat
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
Definition:SelectionDAG.cpp:12414
llvm::PackElem::Hi
@ Hi
llvm::PackElem::Lo
@ Lo
llvm::IRMemLocation::Other
@ Other
Any other memory.
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::RecurKind::Mul
@ Mul
Product of integers.
llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::RecurKind::Add
@ Add
Sum of integers.
llvm::Op
DWARFExpression::Operation Op
Definition:DWARFExpression.cpp:22
llvm::isConstOrConstSplat
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition:SelectionDAG.cpp:12331
llvm::isConstFalseVal
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition:Utils.cpp:1625
llvm::BitWidth
constexpr unsigned BitWidth
Definition:BitmaskEnum.h:217
llvm::isOneConstant
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Definition:SelectionDAG.cpp:12224
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition:Alignment.h:212
llvm::isNullFPConstant
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
Definition:SelectionDAG.cpp:12214
llvm::neg
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition:APFloat.h:1540
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition:Alignment.h:208
llvm::PGSOQueryType::Test
@ Test
llvm::fltNanEncoding::AllOnes
@ AllOnes
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition:SelectionDAG.cpp:12219
llvm::NextPowerOf2
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition:MathExtras.h:383
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860
N
#define N
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition:APFloat.h:302
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition:APFloat.h:306
llvm::APFloatBase::opStatus
opStatus
IEEE-754R 7: Default exception handling.
Definition:APFloat.h:318
llvm::APFloatBase::opOverflow
@ opOverflow
Definition:APFloat.h:322
llvm::APFloatBase::opInexact
@ opInexact
Definition:APFloat.h:324
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition:Alignment.h:39
llvm::DenormalMode
Represent subnormal handling kind for floating point instruction inputs and outputs.
Definition:FloatingPointMode.h:70
llvm::DenormalMode::Input
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
Definition:FloatingPointMode.h:96
llvm::DenormalMode::PreserveSign
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
Definition:FloatingPointMode.h:80
llvm::DenormalMode::PositiveZero
@ PositiveZero
Denormals are flushed to positive zero.
Definition:FloatingPointMode.h:83
llvm::DenormalMode::IEEE
@ IEEE
IEEE-754 denormal numbers preserved.
Definition:FloatingPointMode.h:77
llvm::DenormalMode::inputsAreZero
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Definition:FloatingPointMode.h:150
llvm::EVT
Extended Value Type.
Definition:ValueTypes.h:35
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition:ValueTypes.h:390
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition:ValueTypes.h:137
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition:ValueTypes.h:74
llvm::EVT::changeTypeToInteger
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition:ValueTypes.h:121
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition:ValueTypes.h:279
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition:ValueTypes.h:295
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition:ValueTypes.h:147
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition:ValueTypes.h:345
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition:ValueTypes.h:368
llvm::EVT::isByteSized
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition:ValueTypes.h:238
llvm::EVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition:ValueTypes.h:354
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition:ValueTypes.h:380
llvm::EVT::getHalfSizedIntegerVT
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition:ValueTypes.h:425
llvm::EVT::isPow2VectorType
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition:ValueTypes.h:465
llvm::EVT::getStoreSizeInBits
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition:ValueTypes.h:407
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition:ValueTypes.h:311
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition:ValueTypes.h:65
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition:ValueTypes.h:376
llvm::EVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition:ValueTypes.h:181
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition:ValueTypes.h:168
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition:ValueTypes.h:318
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition:ValueTypes.cpp:210
llvm::EVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition:ValueTypes.h:174
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition:ValueTypes.h:323
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition:ValueTypes.h:157
llvm::EVT::changeVectorElementType
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition:ValueTypes.h:102
llvm::EVT::getFltSemantics
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition:ValueTypes.cpp:320
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition:ValueTypes.h:331
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition:ValueTypes.h:303
llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition:ValueTypes.h:448
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition:ValueTypes.h:152
llvm::InlineAsm::ConstraintInfo
Definition:InlineAsm.h:123
llvm::InlineAsm::ConstraintInfo::Type
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition:InlineAsm.h:126
llvm::InlineAsm::ConstraintInfo::MatchingInput
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition:InlineAsm.h:136
llvm::InlineAsm::ConstraintInfo::Codes
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition:InlineAsm.h:154
llvm::InlineAsm::ConstraintInfo::multipleAlternatives
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition:InlineAsm.h:161
llvm::InlineAsm::ConstraintInfo::isIndirect
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition:InlineAsm.h:150
llvm::InlineAsm::ConstraintInfo::hasMatchingInput
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition:InlineAsm.h:140
llvm::Inverse
Definition:GraphTraits.h:123
llvm::KnownBits
Definition:KnownBits.h:23
llvm::KnownBits::makeConstant
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition:KnownBits.h:293
llvm::KnownBits::anyextOrTrunc
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition:KnownBits.h:178
llvm::KnownBits::countMinSignBits
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition:KnownBits.h:247
llvm::KnownBits::smax
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition:KnownBits.cpp:211
llvm::KnownBits::isNonNegative
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition:KnownBits.h:100
llvm::KnownBits::isZero
bool isZero() const
Returns true if value is all zero.
Definition:KnownBits.h:79
llvm::KnownBits::countMinTrailingZeros
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition:KnownBits.h:234
llvm::KnownBits::isUnknown
bool isUnknown() const
Returns true if we don't know any bits.
Definition:KnownBits.h:65
llvm::KnownBits::trunc
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition:KnownBits.h:153
llvm::KnownBits::sge
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition:KnownBits.cpp:536
llvm::KnownBits::countMaxPopulation
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition:KnownBits.h:281
llvm::KnownBits::concat
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition:KnownBits.h:225
llvm::KnownBits::getBitWidth
unsigned getBitWidth() const
Get the bit width of this value.
Definition:KnownBits.h:43
llvm::KnownBits::umax
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition:KnownBits.cpp:187
llvm::KnownBits::zext
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition:KnownBits.h:164
llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition:KnownBits.h:73
llvm::KnownBits::unionWith
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition:KnownBits.h:313
llvm::KnownBits::intersectWith
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition:KnownBits.h:303
llvm::KnownBits::sext
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition:KnownBits.h:172
llvm::KnownBits::countMinLeadingZeros
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition:KnownBits.h:240
llvm::KnownBits::smin
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition:KnownBits.cpp:215
llvm::KnownBits::ugt
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition:KnownBits.cpp:502
llvm::KnownBits::slt
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition:KnownBits.cpp:542
llvm::KnownBits::computeForAddSub
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition:KnownBits.cpp:60
llvm::KnownBits::ult
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition:KnownBits.cpp:518
llvm::KnownBits::ule
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition:KnownBits.cpp:522
llvm::KnownBits::isNegative
bool isNegative() const
Returns true if this value is known to be negative.
Definition:KnownBits.h:97
llvm::KnownBits::One
APInt One
Definition:KnownBits.h:25
llvm::KnownBits::mul
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition:KnownBits.cpp:804
llvm::KnownBits::anyext
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition:KnownBits.h:159
llvm::KnownBits::Zero
APInt Zero
Definition:KnownBits.h:24
llvm::KnownBits::sle
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition:KnownBits.cpp:546
llvm::KnownBits::sgt
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition:KnownBits.cpp:526
llvm::KnownBits::countMinPopulation
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition:KnownBits.h:278
llvm::KnownBits::uge
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition:KnownBits.cpp:512
llvm::KnownBits::umin
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition:KnownBits.cpp:205
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition:MachineMemOperand.h:41
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition:MachineOperand.cpp:1066
llvm::MachinePointerInfo::getUnknownStack
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
Definition:MachineOperand.cpp:1090
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition:MachineOperand.cpp:1072
llvm::MemOp
Definition:TargetLowering.h:115
llvm::MinMax
Definition:AssumeBundleQueries.h:70
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition:SelectionDAGNodes.h:381
llvm::SDNodeFlags::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition:SelectionDAGNodes.h:458
llvm::SDNodeFlags::NoWrap
@ NoWrap
Definition:SelectionDAGNodes.h:396
llvm::SDNodeFlags::Disjoint
@ Disjoint
Definition:SelectionDAGNodes.h:398
llvm::SDNodeFlags::Exact
@ Exact
Definition:SelectionDAGNodes.h:397
llvm::SDNodeFlags::NonNeg
@ NonNeg
Definition:SelectionDAGNodes.h:399
llvm::SDNodeFlags::NoSignedZeros
@ NoSignedZeros
Definition:SelectionDAGNodes.h:402
llvm::SDNodeFlags::Unpredictable
@ Unpredictable
Definition:SelectionDAGNodes.h:415
llvm::SDNodeFlags::None
@ None
Definition:SelectionDAGNodes.h:393
llvm::SDNodeFlags::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition:SelectionDAGNodes.h:459
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition:SelectionDAGNodes.h:79
llvm::SignedDivisionByConstantInfo
Magic data for optimising signed division by a constant.
Definition:DivisionByConstantInfo.h:21
llvm::SignedDivisionByConstantInfo::ShiftAmount
unsigned ShiftAmount
shift amount
Definition:DivisionByConstantInfo.h:24
llvm::SignedDivisionByConstantInfo::Magic
APInt Magic
magic number
Definition:DivisionByConstantInfo.h:23
llvm::SignedDivisionByConstantInfo::get
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
Definition:DivisionByConstantInfo.cpp:21
llvm::TargetLowering::AsmOperandInfo
This contains information for each constraint that we are lowering.
Definition:TargetLowering.h:4977
llvm::TargetLowering::AsmOperandInfo::ConstraintVT
MVT ConstraintVT
The ValueType for the operand value.
Definition:TargetLowering.h:4993
llvm::TargetLowering::AsmOperandInfo::ConstraintType
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
Definition:TargetLowering.h:4985
llvm::TargetLowering::AsmOperandInfo::ConstraintCode
std::string ConstraintCode
This contains the actual string for the code, like "m".
Definition:TargetLowering.h:4981
llvm::TargetLowering::AsmOperandInfo::CallOperandVal
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
Definition:TargetLowering.h:4990
llvm::TargetLowering::AsmOperandInfo::getMatchedOperand
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
Definition:TargetLowering.cpp:5720
llvm::TargetLowering::AsmOperandInfo::isMatchingInputConstraint
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
Definition:TargetLowering.cpp:5713
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition:TargetLowering.h:4529
llvm::TargetLowering::CallLoweringInfo::setIsPostTypeLegalization
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
Definition:TargetLowering.h:4693
llvm::TargetLowering::CallLoweringInfo::setLibCallee
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
Definition:TargetLowering.h:4583
llvm::TargetLowering::CallLoweringInfo::setDiscardResult
CallLoweringInfo & setDiscardResult(bool Value=true)
Definition:TargetLowering.h:4658
llvm::TargetLowering::CallLoweringInfo::setZExtResult
CallLoweringInfo & setZExtResult(bool Value=true)
Definition:TargetLowering.h:4673
llvm::TargetLowering::CallLoweringInfo::setDebugLoc
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
Definition:TargetLowering.h:4572
llvm::TargetLowering::CallLoweringInfo::setSExtResult
CallLoweringInfo & setSExtResult(bool Value=true)
Definition:TargetLowering.h:4668
llvm::TargetLowering::CallLoweringInfo::setNoReturn
CallLoweringInfo & setNoReturn(bool Value=true)
Definition:TargetLowering.h:4643
llvm::TargetLowering::CallLoweringInfo::setChain
CallLoweringInfo & setChain(SDValue InChain)
Definition:TargetLowering.h:4577
llvm::TargetLowering::DAGCombinerInfo
Definition:TargetLowering.h:4228
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition:TargetLowering.h:4240
llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
void AddToWorklist(SDNode *N)
Definition:DAGCombiner.cpp:916
llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer
bool isCalledByLegalizer() const
Definition:TargetLowering.h:4243
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition:TargetLowering.h:4239
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition:TargetLowering.h:4234
llvm::TargetLowering::DAGCombinerInfo::CommitTargetLoweringOpt
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
Definition:DAGCombiner.cpp:941
llvm::TargetLowering::MakeLibCallOptions
This structure is used to pass arguments to makeLibCall function.
Definition:TargetLowering.h:4714
llvm::TargetLowering::MakeLibCallOptions::setIsPostTypeLegalization
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
Definition:TargetLowering.h:4744
llvm::TargetLowering::MakeLibCallOptions::IsSigned
bool IsSigned
Definition:TargetLowering.h:4719
llvm::TargetLowering::MakeLibCallOptions::OpsVTBeforeSoften
ArrayRef< EVT > OpsVTBeforeSoften
Definition:TargetLowering.h:4717
llvm::TargetLowering::MakeLibCallOptions::IsPostTypeLegalization
bool IsPostTypeLegalization
Definition:TargetLowering.h:4722
llvm::TargetLowering::MakeLibCallOptions::setIsSigned
MakeLibCallOptions & setIsSigned(bool Value=true)
Definition:TargetLowering.h:4729
llvm::TargetLowering::MakeLibCallOptions::IsReturnValueUsed
bool IsReturnValueUsed
Definition:TargetLowering.h:4721
llvm::TargetLowering::MakeLibCallOptions::DoesNotReturn
bool DoesNotReturn
Definition:TargetLowering.h:4720
llvm::TargetLowering::MakeLibCallOptions::setTypeListBeforeSoften
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
Definition:TargetLowering.h:4749
llvm::TargetLowering::MakeLibCallOptions::IsSoften
bool IsSoften
Definition:TargetLowering.h:4723
llvm::TargetLowering::MakeLibCallOptions::RetVTBeforeSoften
EVT RetVTBeforeSoften
Definition:TargetLowering.h:4718
llvm::TargetLowering::TargetLoweringOpt
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Definition:TargetLowering.h:3946
llvm::TargetLowering::TargetLoweringOpt::DAG
SelectionDAG & DAG
Definition:TargetLowering.h:3947
llvm::TargetLowering::TargetLoweringOpt::CombineTo
bool CombineTo(SDValue O, SDValue N)
Definition:TargetLowering.h:3960
llvm::TargetLowering::TargetLoweringOpt::LegalTypes
bool LegalTypes() const
Definition:TargetLowering.h:3957
llvm::TargetLowering::TargetLoweringOpt::New
SDValue New
Definition:TargetLowering.h:3951
llvm::TargetLowering::TargetLoweringOpt::LegalOps
bool LegalOps
Definition:TargetLowering.h:3949
llvm::TargetLowering::TargetLoweringOpt::LegalOperations
bool LegalOperations() const
Definition:TargetLowering.h:3958
llvm::UnsignedDivisionByConstantInfo
Magic data for optimising unsigned division by a constant.
Definition:DivisionByConstantInfo.h:28
llvm::UnsignedDivisionByConstantInfo::IsAdd
bool IsAdd
add indicator
Definition:DivisionByConstantInfo.h:33
llvm::UnsignedDivisionByConstantInfo::PreShift
unsigned PreShift
pre-shift amount
Definition:DivisionByConstantInfo.h:35
llvm::UnsignedDivisionByConstantInfo::get
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
Definition:DivisionByConstantInfo.cpp:74
llvm::UnsignedDivisionByConstantInfo::Magic
APInt Magic
magic number
Definition:DivisionByConstantInfo.h:32
llvm::UnsignedDivisionByConstantInfo::PostShift
unsigned PostShift
post-shift amount
Definition:DivisionByConstantInfo.h:34
llvm::fltSemantics
Definition:APFloat.cpp:103

Generated on Thu Jul 17 2025 11:45:50 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp